made handling of nul terminators in CUnicode more sane.
This commit is contained in:
parent
6fc6805a06
commit
5fe7763d37
|
@ -82,9 +82,8 @@ CUnicode::UTF8ToUCS2(const CString& src, bool* errors)
|
||||||
// default to success
|
// default to success
|
||||||
resetError(errors);
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output
|
||||||
// include UTF8's nul terminator.
|
UInt32 n = src.size();
|
||||||
UInt32 n = src.size() + 1;
|
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(2 * n);
|
dst.reserve(2 * n);
|
||||||
|
|
||||||
|
@ -112,9 +111,8 @@ CUnicode::UTF8ToUCS4(const CString& src, bool* errors)
|
||||||
// default to success
|
// default to success
|
||||||
resetError(errors);
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output
|
||||||
// include UTF8's nul terminator.
|
UInt32 n = src.size();
|
||||||
UInt32 n = src.size() + 1;
|
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(4 * n);
|
dst.reserve(4 * n);
|
||||||
|
|
||||||
|
@ -137,9 +135,8 @@ CUnicode::UTF8ToUTF16(const CString& src, bool* errors)
|
||||||
// default to success
|
// default to success
|
||||||
resetError(errors);
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output
|
||||||
// include UTF8's nul terminator.
|
UInt32 n = src.size();
|
||||||
UInt32 n = src.size() + 1;
|
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(2 * n);
|
dst.reserve(2 * n);
|
||||||
|
|
||||||
|
@ -176,9 +173,8 @@ CUnicode::UTF8ToUTF32(const CString& src, bool* errors)
|
||||||
// default to success
|
// default to success
|
||||||
resetError(errors);
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output
|
||||||
// include UTF8's nul terminator.
|
UInt32 n = src.size();
|
||||||
UInt32 n = src.size() + 1;
|
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(4 * n);
|
dst.reserve(4 * n);
|
||||||
|
|
||||||
|
@ -211,12 +207,13 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
|
||||||
|
|
||||||
// get length of multibyte string
|
// get length of multibyte string
|
||||||
char mbc[MB_LEN_MAX];
|
char mbc[MB_LEN_MAX];
|
||||||
|
size_t mblen;
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
memset(&state, 0, sizeof(state));
|
memset(&state, 0, sizeof(state));
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
UInt32 n = size;
|
UInt32 n = size;
|
||||||
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
||||||
size_t mblen = wcrtomb(mbc, *scan, &state);
|
mblen = wcrtomb(mbc, *scan, &state);
|
||||||
if (mblen == -1) {
|
if (mblen == -1) {
|
||||||
// unconvertable character
|
// unconvertable character
|
||||||
setError(errors);
|
setError(errors);
|
||||||
|
@ -227,21 +224,21 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if state is in initial state. if not then count the
|
// handle nul terminator
|
||||||
// bytes for returning it to the initial state.
|
mblen = wcrtomb(mbc, L'\0', &state);
|
||||||
if (mbsinit(&state) == 0) {
|
if (mblen != -1) {
|
||||||
len += wcrtomb(mbc, L'\0', &state) - 1;
|
len += mblen - 1;
|
||||||
}
|
}
|
||||||
assert(mbsinit(&state) != 0);
|
assert(mbsinit(&state) != 0);
|
||||||
|
|
||||||
// allocate multibyte string
|
// allocate multibyte string
|
||||||
char* mbs = new char[len + 1];
|
char* mbs = new char[len];
|
||||||
|
|
||||||
// convert to multibyte
|
// convert to multibyte
|
||||||
char* dst = mbs;
|
char* dst = mbs;
|
||||||
n = size;
|
n = size;
|
||||||
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
||||||
size_t mblen = wcrtomb(dst, *scan, &state);
|
mblen = wcrtomb(dst, *scan, &state);
|
||||||
if (mblen == -1) {
|
if (mblen == -1) {
|
||||||
// unconvertable character
|
// unconvertable character
|
||||||
*dst++ = '?';
|
*dst++ = '?';
|
||||||
|
@ -250,7 +247,11 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
|
||||||
dst += mblen;
|
dst += mblen;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*dst++ = '\0';
|
mblen = wcrtomb(dst, L'\0', &state);
|
||||||
|
if (mblen != -1) {
|
||||||
|
// don't include nul terminator
|
||||||
|
dst += mblen - 1;
|
||||||
|
}
|
||||||
CString text(mbs, dst - mbs);
|
CString text(mbs, dst - mbs);
|
||||||
|
|
||||||
// clean up
|
// clean up
|
||||||
|
@ -311,7 +312,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
|
||||||
resetError(errors);
|
resetError(errors);
|
||||||
|
|
||||||
// get length of multibyte string
|
// get length of multibyte string
|
||||||
UInt32 n = src.size();
|
UInt32 n = src.size() + 1;
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
memset(&state, 0, sizeof(state));
|
memset(&state, 0, sizeof(state));
|
||||||
|
@ -399,9 +400,7 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
|
||||||
wchar_t*
|
wchar_t*
|
||||||
CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors)
|
CUnicode::UTF8ToWideChar(const CString& src, UInt32& size, bool* errors)
|
||||||
{
|
{
|
||||||
// convert to platform's wide character encoding.
|
// convert to platform's wide character encoding
|
||||||
// note -- this must include a wide nul character (independent of
|
|
||||||
// the CString's nul character).
|
|
||||||
#if WINDOWS_LIKE
|
#if WINDOWS_LIKE
|
||||||
CString tmp = UTF8ToUTF16(src, errors);
|
CString tmp = UTF8ToUTF16(src, errors);
|
||||||
size = tmp.size() >> 1;
|
size = tmp.size() >> 1;
|
||||||
|
@ -442,11 +441,6 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
toUTF8(dst, c, errors);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
|
||||||
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
|
|
||||||
dst.resize(dst.size() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -463,11 +457,6 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
toUTF8(dst, c, errors);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
|
||||||
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
|
|
||||||
dst.resize(dst.size() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,11 +499,6 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
|
||||||
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
|
|
||||||
dst.resize(dst.size() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -535,11 +519,6 @@ CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
toUTF8(dst, c, errors);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
|
||||||
if (dst.size() > 0 && dst[dst.size() - 1] == '\0') {
|
|
||||||
dst.resize(dst.size() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,7 @@ public:
|
||||||
// not NULL then it gets true if any characters could not be
|
// not NULL then it gets true if any characters could not be
|
||||||
// encoded in the target encoding and false otherwise. note
|
// encoded in the target encoding and false otherwise. note
|
||||||
// that decoding errors do not set errors to error. UTF8ToText()
|
// that decoding errors do not set errors to error. UTF8ToText()
|
||||||
// converts to the current locale's (multibyte) encoding. all of
|
// converts to the current locale's (multibyte) encoding.
|
||||||
// these methods include the nul terminator in the returned
|
|
||||||
// string (independent of the CString's own nul terminator).
|
|
||||||
static CString UTF8ToUCS2(const CString&, bool* errors = NULL);
|
static CString UTF8ToUCS2(const CString&, bool* errors = NULL);
|
||||||
static CString UTF8ToUCS4(const CString&, bool* errors = NULL);
|
static CString UTF8ToUCS4(const CString&, bool* errors = NULL);
|
||||||
static CString UTF8ToUTF16(const CString&, bool* errors = NULL);
|
static CString UTF8ToUTF16(const CString&, bool* errors = NULL);
|
||||||
|
@ -27,9 +25,7 @@ public:
|
||||||
// convert from some encoding to UTF-8. if errors is not NULL
|
// convert from some encoding to UTF-8. if errors is not NULL
|
||||||
// then it gets true if any characters could not be decoded and
|
// then it gets true if any characters could not be decoded and
|
||||||
// false otherwise. textToUTF8() converts from the current
|
// false otherwise. textToUTF8() converts from the current
|
||||||
// locale's (multibyte) encoding. all of these methods strip
|
// locale's (multibyte) encoding.
|
||||||
// a terminating nul so the returned UTF-8 string uses the
|
|
||||||
// CString's own nul terminator for termination.
|
|
||||||
static CString UCS2ToUTF8(const CString&, bool* errors = NULL);
|
static CString UCS2ToUTF8(const CString&, bool* errors = NULL);
|
||||||
static CString UCS4ToUTF8(const CString&, bool* errors = NULL);
|
static CString UCS4ToUTF8(const CString&, bool* errors = NULL);
|
||||||
static CString UTF16ToUTF8(const CString&, bool* errors = NULL);
|
static CString UTF16ToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
@ -37,9 +33,10 @@ public:
|
||||||
static CString textToUTF8(const CString&, bool* errors = NULL);
|
static CString textToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// convert UTF8 to nul terminated wchar_t string (using whatever
|
// convert UTF8 to wchar_t string (using whatever encoding is native
|
||||||
// encoding is native to the platform). caller must delete[]
|
// to the platform). caller must delete[] the returned string. the
|
||||||
// the returned string.
|
// string is *not* nul terminated; the length (in characters) is
|
||||||
|
// returned in size.
|
||||||
static wchar_t* UTF8ToWideChar(const CString&,
|
static wchar_t* UTF8ToWideChar(const CString&,
|
||||||
UInt32& size, bool* errors);
|
UInt32& size, bool* errors);
|
||||||
|
|
||||||
|
|
|
@ -24,11 +24,17 @@ CMSWindowsClipboardTextConverter::getWin32Format() const
|
||||||
CString
|
CString
|
||||||
CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const
|
CMSWindowsClipboardTextConverter::doFromIClipboard(const CString& data) const
|
||||||
{
|
{
|
||||||
return CUnicode::UTF8ToText(data);
|
// convert and add nul terminator
|
||||||
|
return CUnicode::UTF8ToText(data) += '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const
|
CMSWindowsClipboardTextConverter::doToIClipboard(const CString& data) const
|
||||||
{
|
{
|
||||||
return CUnicode::textToUTF8(data);
|
// convert and strip nul terminator
|
||||||
|
CString dst = CUnicode::textToUTF8(data);
|
||||||
|
if (dst.size() > 0 && dst[size() - 1] == '\0') {
|
||||||
|
dst.erase(dst.size() - 1);
|
||||||
|
}
|
||||||
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,17 @@ CMSWindowsClipboardUTF16Converter::getWin32Format() const
|
||||||
CString
|
CString
|
||||||
CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const
|
CMSWindowsClipboardUTF16Converter::doFromIClipboard(const CString& data) const
|
||||||
{
|
{
|
||||||
return CUnicode::UTF8ToUTF16(data);
|
// convert and add nul terminator
|
||||||
|
return CUnicode::UTF8ToUTF16(data).append(sizeof(wchar_t), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const
|
CMSWindowsClipboardUTF16Converter::doToIClipboard(const CString& data) const
|
||||||
{
|
{
|
||||||
return CUnicode::UTF16ToUTF8(data);
|
// convert and strip nul terminator
|
||||||
|
CString dst = CUnicode::UTF16ToUTF8(data);
|
||||||
|
if (dst.size() > 0 && dst[size() - 1] == '\0') {
|
||||||
|
dst.erase(dst.size() - 1);
|
||||||
|
}
|
||||||
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue