made handling of nul terminators in CUnicode more sane.

This commit is contained in:
crs 2002-07-24 17:22:01 +00:00
parent 6fc6805a06
commit 5fe7763d37
4 changed files with 45 additions and 57 deletions

View File

@ -82,9 +82,8 @@ CUnicode::UTF8ToUCS2(const CString& src, bool* errors)
// default to success // default to success
resetError(errors); resetError(errors);
// get size of input string and reserve some space in output. // get size of input string and reserve some space in output
// include UTF8's nul terminator. UInt32 n = src.size();
UInt32 n = src.size() + 1;
CString dst; CString dst;
dst.reserve(2 * n); dst.reserve(2 * n);
@ -112,9 +111,8 @@ CUnicode::UTF8ToUCS4(const CString& src, bool* errors)
// default to success // default to success
resetError(errors); resetError(errors);
// get size of input string and reserve some space in output. // get size of input string and reserve some space in output
// include UTF8's nul terminator. UInt32 n = src.size();
UInt32 n = src.size() + 1;
CString dst; CString dst;
dst.reserve(4 * n); dst.reserve(4 * n);
@ -137,9 +135,8 @@ CUnicode::UTF8ToUTF16(const CString& src, bool* errors)
// default to success // default to success
resetError(errors); resetError(errors);
// get size of input string and reserve some space in output. // get size of input string and reserve some space in output
// include UTF8's nul terminator. UInt32 n = src.size();
UInt32 n = src.size() + 1;
CString dst; CString dst;
dst.reserve(2 * n); dst.reserve(2 * n);
@ -176,9 +173,8 @@ CUnicode::UTF8ToUTF32(const CString& src, bool* errors)
// default to success // default to success
resetError(errors); resetError(errors);
// get size of input string and reserve some space in output. // get size of input string and reserve some space in output
// include UTF8's nul terminator. UInt32 n = src.size();
UInt32 n = src.size() + 1;
CString dst; CString dst;
dst.reserve(4 * n); dst.reserve(4 * n);
@ -211,12 +207,13 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
// get length of multibyte string // get length of multibyte string
char mbc[MB_LEN_MAX]; char mbc[MB_LEN_MAX];
size_t mblen;
mbstate_t state; mbstate_t state;
memset(&state, 0, sizeof(state)); memset(&state, 0, sizeof(state));
size_t len = 0; size_t len = 0;
UInt32 n = size; UInt32 n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) { for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
size_t mblen = wcrtomb(mbc, *scan, &state); mblen = wcrtomb(mbc, *scan, &state);
if (mblen == -1) { if (mblen == -1) {
// unconvertable character // unconvertable character
setError(errors); setError(errors);
@ -227,21 +224,21 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
} }
} }
// check if state is in initial state. if not then count the // handle nul terminator
// bytes for returning it to the initial state. mblen = wcrtomb(mbc, L'\0', &state);
if (mbsinit(&state) == 0) { if (mblen != -1) {
len += wcrtomb(mbc, L'\0', &state) - 1; len += mblen - 1;
} }
assert(mbsinit(&state) != 0); assert(mbsinit(&state) != 0);
// allocate multibyte string // allocate multibyte string
char* mbs = new char[len + 1]; char* mbs = new char[len];
// convert to multibyte // convert to multibyte
char* dst = mbs; char* dst = mbs;
n = size; n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) { for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
size_t mblen = wcrtomb(dst, *scan, &state); mblen = wcrtomb(dst, *scan, &state);
if (mblen == -1) { if (mblen == -1) {
// unconvertable character // unconvertable character
*dst++ = '?'; *dst++ = '?';
@ -250,7 +247,11 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
dst += mblen; dst += mblen;
} }
} }
*dst++ = '\0'; mblen = wcrtomb(dst, L'\0', &state);
if (mblen != -1) {
// don't include nul terminator
dst += mblen - 1;
}
CString text(mbs, dst - mbs); CString text(mbs, dst - mbs);
// clean up // clean up
@ -311,7 +312,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
resetError(errors); resetError(errors);
// get length of multibyte string // get length of multibyte string
UInt32 n = src.size(); UInt32 n = src.size() + 1;
size_t len = 0; size_t len = 0;
mbstate_t state; mbstate_t state;
memset(&state, 0, sizeof(state)); memset(&state, 0, sizeof(state));
@ -399,9 +400,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
wchar_t* wchar_t*
CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors) CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors)
{ {
// convert to platform's wide character encoding. // convert to platform's wide character encoding
// note -- this must include a wide nul character (independent of
// the CString's nul character).
#if WINDOWS_LIKE #if WINDOWS_LIKE
CString tmp = UTF8ToUTF16(src, errors); CString tmp = UTF8ToUTF16(src, errors);
size = tmp.size() >> 1; size = tmp.size() >> 1;
@ -442,11 +441,6 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n, bool* errors)
toUTF8(dst, c, errors); toUTF8(dst, c, errors);
} }
// remove extra trailing nul
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
dst.resize(dst.size() - 1);
}
return dst; return dst;
} }
@ -463,11 +457,6 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n, bool* errors)
toUTF8(dst, c, errors); toUTF8(dst, c, errors);
} }
// remove extra trailing nul
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
dst.resize(dst.size() - 1);
}
return dst; return dst;
} }
@ -510,11 +499,6 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n, bool* errors)
} }
} }
// remove extra trailing nul
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
dst.resize(dst.size() - 1);
}
return dst; return dst;
} }
@ -535,11 +519,6 @@ CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n, bool* errors)
toUTF8(dst, c, errors); toUTF8(dst, c, errors);
} }
// remove extra trailing nul
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
dst.resize(dst.size() - 1);
}
return dst; return dst;
} }

View File

@ -15,9 +15,7 @@ public:
// not NULL then it gets true if any characters could not be // not NULL then it gets true if any characters could not be
// encoded in the target encoding and false otherwise. note // encoded in the target encoding and false otherwise. note
// that decoding errors do not set errors to error. UTF8ToText() // that decoding errors do not set errors to error. UTF8ToText()
// converts to the current locale's (multibyte) encoding. all of // converts to the current locale's (multibyte) encoding.
// these methods include the nul terminator in the returned
// string (independent of the CString's own nul terminator).
static CString UTF8ToUCS2(const CString&, bool* errors = NULL); static CString UTF8ToUCS2(const CString&, bool* errors = NULL);
static CString UTF8ToUCS4(const CString&, bool* errors = NULL); static CString UTF8ToUCS4(const CString&, bool* errors = NULL);
static CString UTF8ToUTF16(const CString&, bool* errors = NULL); static CString UTF8ToUTF16(const CString&, bool* errors = NULL);
@ -27,9 +25,7 @@ public:
// convert from some encoding to UTF-8. if errors is not NULL // convert from some encoding to UTF-8. if errors is not NULL
// then it gets true if any characters could not be decoded and // then it gets true if any characters could not be decoded and
// false otherwise. textToUTF8() converts from the current // false otherwise. textToUTF8() converts from the current
// locale's (multibyte) encoding. all of these methods strip // locale's (multibyte) encoding.
// a terminating nul so the returned UTF-8 string uses the
// CString's own nul terminator for termination.
static CString UCS2ToUTF8(const CString&, bool* errors = NULL); static CString UCS2ToUTF8(const CString&, bool* errors = NULL);
static CString UCS4ToUTF8(const CString&, bool* errors = NULL); static CString UCS4ToUTF8(const CString&, bool* errors = NULL);
static CString UTF16ToUTF8(const CString&, bool* errors = NULL); static CString UTF16ToUTF8(const CString&, bool* errors = NULL);
@ -37,9 +33,10 @@ public:
static CString textToUTF8(const CString&, bool* errors = NULL); static CString textToUTF8(const CString&, bool* errors = NULL);
private: private:
// convert UTF8 to nul terminated wchar_t string (using whatever // convert UTF8 to wchar_t string (using whatever encoding is native
// encoding is native to the platform). caller must delete[] // to the platform). caller must delete[] the returned string. the
// the returned string. // string is *not* nul terminated; the length (in characters) is
// returned in size.
static wchar_t* UTF8ToWideChar(const CString&, static wchar_t* UTF8ToWideChar(const CString&,
UInt32& size, bool* errors); UInt32& size, bool* errors);

View File

@ -24,11 +24,17 @@ CMSWindowsClipboardTextConverter::getWin32Format() const
CString CString
CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const
{ {
return CUnicode::UTF8ToText(data); // convert and add nul terminator
return CUnicode::UTF8ToText(data) += '\0';
} }
CString CString
CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const
{ {
return CUnicode::textToUTF8(data); // convert and strip nul terminator
CString dst = CUnicode::textToUTF8(data);
if (dst.size() > 0 && dst[size() - 1] == '\0') {
dst.erase(dst.size() - 1);
}
return dst;
} }

View File

@ -24,11 +24,17 @@ CMSWindowsClipboardUTF16Converter::getWin32Format() const
CString CString
CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const
{ {
return CUnicode::UTF8ToUTF16(data); // convert and add nul terminator
return CUnicode::UTF8ToUTF16(data).append(sizeof(wchar_t), 0);
} }
CString CString
CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const
{ {
return CUnicode::UTF16ToUTF8(data); // convert and strip nul terminator
CString dst = CUnicode::UTF16ToUTF8(data);
if (dst.size() > 0 && dst[size() - 1] == '\0') {
dst.erase(dst.size() - 1);
}
return dst;
} }