diff --git a/base/CUnicode.cpp b/base/CUnicode.cpp index 183499cf..066f8e5f 100644 --- a/base/CUnicode.cpp +++ b/base/CUnicode.cpp @@ -82,9 +82,8 @@ CUnicode::UTF8ToUCS2(const CString& src, bool* errors) // default to success resetError(errors); - // get size of input string and reserve some space in output. - // include UTF8's nul terminator. - UInt32 n = src.size() + 1; + // get size of input string and reserve some space in output + UInt32 n = src.size(); CString dst; dst.reserve(2 * n); @@ -112,9 +111,8 @@ CUnicode::UTF8ToUCS4(const CString& src, bool* errors) // default to success resetError(errors); - // get size of input string and reserve some space in output. - // include UTF8's nul terminator. - UInt32 n = src.size() + 1; + // get size of input string and reserve some space in output + UInt32 n = src.size(); CString dst; dst.reserve(4 * n); @@ -137,9 +135,8 @@ CUnicode::UTF8ToUTF16(const CString& src, bool* errors) // default to success resetError(errors); - // get size of input string and reserve some space in output. - // include UTF8's nul terminator. - UInt32 n = src.size() + 1; + // get size of input string and reserve some space in output + UInt32 n = src.size(); CString dst; dst.reserve(2 * n); @@ -176,9 +173,8 @@ CUnicode::UTF8ToUTF32(const CString& src, bool* errors) // default to success resetError(errors); - // get size of input string and reserve some space in output. - // include UTF8's nul terminator. - UInt32 n = src.size() + 1; + // get size of input string and reserve some space in output + UInt32 n = src.size(); CString dst; dst.reserve(4 * n); @@ -211,12 +207,13 @@ CUnicode::UTF8ToText(const CString& src, bool* errors) // get length of multibyte string char mbc[MB_LEN_MAX]; + size_t mblen; mbstate_t state; memset(&state, 0, sizeof(state)); size_t len = 0; UInt32 n = size; for (const wchar_t* scan = tmp; n > 0; ++scan, --n) { - size_t mblen = wcrtomb(mbc, *scan, &state); + mblen = wcrtomb(mbc, *scan, &state); if (mblen == -1) { // unconvertable character setError(errors); @@ -227,21 +224,21 @@ CUnicode::UTF8ToText(const CString& src, bool* errors) } } - // check if state is in initial state. if not then count the - // bytes for returning it to the initial state. - if (mbsinit(&state) == 0) { - len += wcrtomb(mbc, L'\0', &state) - 1; + // handle nul terminator + mblen = wcrtomb(mbc, L'\0', &state); + if (mblen != -1) { + len += mblen - 1; } assert(mbsinit(&state) != 0); // allocate multibyte string - char* mbs = new char[len + 1]; + char* mbs = new char[len]; // convert to multibyte char* dst = mbs; n = size; for (const wchar_t* scan = tmp; n > 0; ++scan, --n) { - size_t mblen = wcrtomb(dst, *scan, &state); + mblen = wcrtomb(dst, *scan, &state); if (mblen == -1) { // unconvertable character *dst++ = '?'; @@ -250,7 +247,11 @@ CUnicode::UTF8ToText(const CString& src, bool* errors) dst += mblen; } } - *dst++ = '\0'; + mblen = wcrtomb(dst, L'\0', &state); + if (mblen != -1) { + // don't include nul terminator + dst += mblen - 1; + } CString text(mbs, dst - mbs); // clean up @@ -311,7 +312,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors) resetError(errors); // get length of multibyte string - UInt32 n = src.size(); + UInt32 n = src.size() + 1; size_t len = 0; mbstate_t state; memset(&state, 0, sizeof(state)); @@ -399,9 +400,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors) wchar_t* CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors) { - // convert to platform's wide character encoding. - // note -- this must include a wide nul character (independent of - // the CString's nul character). + // convert to platform's wide character encoding #if WINDOWS_LIKE CString tmp = UTF8ToUTF16(src, errors); size = tmp.size() >> 1; @@ -442,11 +441,6 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n, bool* errors) toUTF8(dst, c, errors); } - // remove extra trailing nul - if (dst.size() > 0 && dst[dst.size() - 1] == '\0') { - dst.resize(dst.size() - 1); - } - return dst; } @@ -463,11 +457,6 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n, bool* errors) toUTF8(dst, c, errors); } - // remove extra trailing nul - if (dst.size() > 0 && dst[dst.size() - 1] == '\0') { - dst.resize(dst.size() - 1); - } - return dst; } @@ -510,11 +499,6 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n, bool* errors) } } - // remove extra trailing nul - if (dst.size() > 0 && dst[dst.size() - 1] == '\0') { - dst.resize(dst.size() - 1); - } - return dst; } @@ -535,11 +519,6 @@ CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n, bool* errors) toUTF8(dst, c, errors); } - // remove extra trailing nul - if (dst.size() > 0 && dst[dst.size() - 1] == '\0') { - dst.resize(dst.size() - 1); - } - return dst; } diff --git a/base/CUnicode.h b/base/CUnicode.h index 89d67235..dc2f4df2 100644 --- a/base/CUnicode.h +++ b/base/CUnicode.h @@ -15,9 +15,7 @@ public: // not NULL then it gets true if any characters could not be // encoded in the target encoding and false otherwise. note // that decoding errors do not set errors to error. UTF8ToText() - // converts to the current locale's (multibyte) encoding. all of - // these methods include the nul terminator in the returned - // string (independent of the CString's own nul terminator). + // converts to the current locale's (multibyte) encoding. static CString UTF8ToUCS2(const CString&, bool* errors = NULL); static CString UTF8ToUCS4(const CString&, bool* errors = NULL); static CString UTF8ToUTF16(const CString&, bool* errors = NULL); @@ -27,9 +25,7 @@ public: // convert from some encoding to UTF-8. if errors is not NULL // then it gets true if any characters could not be decoded and // false otherwise. textToUTF8() converts from the current - // locale's (multibyte) encoding. all of these methods strip - // a terminating nul so the returned UTF-8 string uses the - // CString's own nul terminator for termination. + // locale's (multibyte) encoding. static CString UCS2ToUTF8(const CString&, bool* errors = NULL); static CString UCS4ToUTF8(const CString&, bool* errors = NULL); static CString UTF16ToUTF8(const CString&, bool* errors = NULL); @@ -37,9 +33,10 @@ public: static CString textToUTF8(const CString&, bool* errors = NULL); private: - // convert UTF8 to nul terminated wchar_t string (using whatever - // encoding is native to the platform). caller must delete[] - // the returned string. + // convert UTF8 to wchar_t string (using whatever encoding is native + // to the platform). caller must delete[] the returned string. the + // string is *not* nul terminated; the length (in characters) is + // returned in size. static wchar_t* UTF8ToWideChar(const CString&, UInt32& size, bool* errors); diff --git a/platform/CMSWindowsClipboardTextConverter.cpp b/platform/CMSWindowsClipboardTextConverter.cpp index 195d88a1..bde47fd4 100755 --- a/platform/CMSWindowsClipboardTextConverter.cpp +++ b/platform/CMSWindowsClipboardTextConverter.cpp @@ -24,11 +24,17 @@ CMSWindowsClipboardTextConverter::getWin32Format() const CString CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const { - return CUnicode::UTF8ToText(data); + // convert and add nul terminator + return CUnicode::UTF8ToText(data) += '\0'; } CString CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const { - return CUnicode::textToUTF8(data); + // convert and strip nul terminator + CString dst = CUnicode::textToUTF8(data); + if (dst.size() > 0 && dst[size() - 1] == '\0') { + dst.erase(dst.size() - 1); + } + return dst; } diff --git a/platform/CMSWindowsClipboardUTF16Converter.cpp b/platform/CMSWindowsClipboardUTF16Converter.cpp index 3c9da906..ab314f0c 100755 --- a/platform/CMSWindowsClipboardUTF16Converter.cpp +++ b/platform/CMSWindowsClipboardUTF16Converter.cpp @@ -24,11 +24,17 @@ CMSWindowsClipboardUTF16Converter::getWin32Format() const CString CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const { - return CUnicode::UTF8ToUTF16(data); + // convert and add nul terminator + return CUnicode::UTF8ToUTF16(data).append(sizeof(wchar_t), 0); } CString CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const { - return CUnicode::UTF16ToUTF8(data); + // convert and strip nul terminator + CString dst = CUnicode::UTF16ToUTF8(data); + if (dst.size() > 0 && dst[size() - 1] == '\0') { + dst.erase(dst.size() - 1); + } + return dst; }