checkpoint. more UTF8 clipboard stuff.
This commit is contained in:
parent
16cc05d56b
commit
1f5cb6a96f
|
@ -36,6 +36,26 @@ decode32(const UInt8* n)
|
||||||
return c.n32;
|
return c.n32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
static
|
||||||
|
void
|
||||||
|
resetError(bool* errors)
|
||||||
|
{
|
||||||
|
if (errors != NULL) {
|
||||||
|
*errors = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
static
|
||||||
|
void
|
||||||
|
setError(bool* errors)
|
||||||
|
{
|
||||||
|
if (errors != NULL) {
|
||||||
|
*errors = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// CUnicode
|
// CUnicode
|
||||||
//
|
//
|
||||||
|
@ -43,9 +63,25 @@ decode32(const UInt8* n)
|
||||||
UInt32 CUnicode::s_invalid = 0x0000ffff;
|
UInt32 CUnicode::s_invalid = 0x0000ffff;
|
||||||
UInt32 CUnicode::s_replacement = 0x0000fffd;
|
UInt32 CUnicode::s_replacement = 0x0000fffd;
|
||||||
|
|
||||||
CString
|
bool
|
||||||
CUnicode::UTF8ToUCS2(const CString& src)
|
CUnicode::isUTF8(const CString& src)
|
||||||
{
|
{
|
||||||
|
// convert and test each character
|
||||||
|
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
||||||
|
for (UInt32 n = src.size(); n > 0; ) {
|
||||||
|
if (fromUTF8(data, n) == s_invalid) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
CString
|
||||||
|
CUnicode::UTF8ToUCS2(const CString& src, bool* errors)
|
||||||
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output.
|
||||||
// include UTF8's nul terminator.
|
// include UTF8's nul terminator.
|
||||||
UInt32 n = src.size() + 1;
|
UInt32 n = src.size() + 1;
|
||||||
|
@ -56,7 +92,11 @@ CUnicode::UTF8ToUCS2(const CString& src)
|
||||||
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
UInt32 c = fromUTF8(data, n);
|
UInt32 c = fromUTF8(data, n);
|
||||||
if (c == s_invalid || c >= 0x00010000) {
|
if (c == s_invalid) {
|
||||||
|
c = s_replacement;
|
||||||
|
}
|
||||||
|
else if (c >= 0x00010000) {
|
||||||
|
setError(errors);
|
||||||
c = s_replacement;
|
c = s_replacement;
|
||||||
}
|
}
|
||||||
UInt16 ucs2 = static_cast<UInt16>(c);
|
UInt16 ucs2 = static_cast<UInt16>(c);
|
||||||
|
@ -67,8 +107,11 @@ CUnicode::UTF8ToUCS2(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::UTF8ToUCS4(const CString& src)
|
CUnicode::UTF8ToUCS4(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output.
|
||||||
// include UTF8's nul terminator.
|
// include UTF8's nul terminator.
|
||||||
UInt32 n = src.size() + 1;
|
UInt32 n = src.size() + 1;
|
||||||
|
@ -89,8 +132,11 @@ CUnicode::UTF8ToUCS4(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::UTF8ToUTF16(const CString& src)
|
CUnicode::UTF8ToUTF16(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output.
|
||||||
// include UTF8's nul terminator.
|
// include UTF8's nul terminator.
|
||||||
UInt32 n = src.size() + 1;
|
UInt32 n = src.size() + 1;
|
||||||
|
@ -101,7 +147,11 @@ CUnicode::UTF8ToUTF16(const CString& src)
|
||||||
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
UInt32 c = fromUTF8(data, n);
|
UInt32 c = fromUTF8(data, n);
|
||||||
if (c == s_invalid || c >= 0x00110000) {
|
if (c == s_invalid) {
|
||||||
|
c = s_replacement;
|
||||||
|
}
|
||||||
|
else if (c >= 0x00110000) {
|
||||||
|
setError(errors);
|
||||||
c = s_replacement;
|
c = s_replacement;
|
||||||
}
|
}
|
||||||
if (c < 0x00010000) {
|
if (c < 0x00010000) {
|
||||||
|
@ -121,8 +171,11 @@ CUnicode::UTF8ToUTF16(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::UTF8ToUTF32(const CString& src)
|
CUnicode::UTF8ToUTF32(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// get size of input string and reserve some space in output.
|
// get size of input string and reserve some space in output.
|
||||||
// include UTF8's nul terminator.
|
// include UTF8's nul terminator.
|
||||||
UInt32 n = src.size() + 1;
|
UInt32 n = src.size() + 1;
|
||||||
|
@ -133,7 +186,11 @@ CUnicode::UTF8ToUTF32(const CString& src)
|
||||||
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
const UInt8* data = reinterpret_cast<const UInt8*>(src.c_str());
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
UInt32 c = fromUTF8(data, n);
|
UInt32 c = fromUTF8(data, n);
|
||||||
if (c == s_invalid || c >= 0x00110000) {
|
if (c == s_invalid) {
|
||||||
|
c = s_replacement;
|
||||||
|
}
|
||||||
|
else if (c >= 0x00110000) {
|
||||||
|
setError(errors);
|
||||||
c = s_replacement;
|
c = s_replacement;
|
||||||
}
|
}
|
||||||
dst.append(reinterpret_cast<const char*>(&c), 4);
|
dst.append(reinterpret_cast<const char*>(&c), 4);
|
||||||
|
@ -143,38 +200,13 @@ CUnicode::UTF8ToUTF32(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::UCS2ToUTF8(const CString& src)
|
CUnicode::UTF8ToText(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
UInt32 n = src.size() >> 1;
|
// default to success
|
||||||
return doUCS2ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n);
|
resetError(errors);
|
||||||
}
|
|
||||||
|
|
||||||
CString
|
|
||||||
CUnicode::UCS4ToUTF8(const CString& src)
|
|
||||||
{
|
|
||||||
UInt32 n = src.size() >> 2;
|
|
||||||
return doUCS4ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n);
|
|
||||||
}
|
|
||||||
|
|
||||||
CString
|
|
||||||
CUnicode::UTF16ToUTF8(const CString& src)
|
|
||||||
{
|
|
||||||
UInt32 n = src.size() >> 1;
|
|
||||||
return doUTF16ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n);
|
|
||||||
}
|
|
||||||
|
|
||||||
CString
|
|
||||||
CUnicode::UTF32ToUTF8(const CString& src)
|
|
||||||
{
|
|
||||||
UInt32 n = src.size() >> 2;
|
|
||||||
return doUTF32ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n);
|
|
||||||
}
|
|
||||||
|
|
||||||
CString
|
|
||||||
CUnicode::UTF8ToText(const CString& src)
|
|
||||||
{
|
|
||||||
// convert to wide char
|
// convert to wide char
|
||||||
wchar_t* tmp = UTF8ToWideChar(src);
|
wchar_t* tmp = UTF8ToWideChar(src, errors);
|
||||||
|
|
||||||
// get length of multibyte string
|
// get length of multibyte string
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
|
@ -185,6 +217,7 @@ CUnicode::UTF8ToText(const CString& src)
|
||||||
size_t mblen = wcrtomb(mbc, *scan, &state);
|
size_t mblen = wcrtomb(mbc, *scan, &state);
|
||||||
if (mblen == -1) {
|
if (mblen == -1) {
|
||||||
// unconvertable character
|
// unconvertable character
|
||||||
|
setError(errors);
|
||||||
len += 1;
|
len += 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -225,34 +258,116 @@ CUnicode::UTF8ToText(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::textToUTF8(const CString& src)
|
CUnicode::UCS2ToUTF8(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
// get length of wide char string
|
UInt32 n = src.size() >> 1;
|
||||||
|
return doUCS2ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
CString
|
||||||
|
CUnicode::UCS4ToUTF8(const CString& src, bool* errors)
|
||||||
|
{
|
||||||
|
UInt32 n = src.size() >> 2;
|
||||||
|
return doUCS4ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
CString
|
||||||
|
CUnicode::UTF16ToUTF8(const CString& src, bool* errors)
|
||||||
|
{
|
||||||
|
UInt32 n = src.size() >> 1;
|
||||||
|
return doUTF16ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
CString
|
||||||
|
CUnicode::UTF32ToUTF8(const CString& src, bool* errors)
|
||||||
|
{
|
||||||
|
UInt32 n = src.size() >> 2;
|
||||||
|
return doUTF32ToUTF8(reinterpret_cast<const UInt8*>(src.data()), n, errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
CString
|
||||||
|
CUnicode::textToUTF8(const CString& src, bool* errors)
|
||||||
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
|
// get length of multibyte string
|
||||||
|
UInt32 n = src.size();
|
||||||
|
size_t len = 0;
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
memset(&state, 0, sizeof(state));
|
memset(&state, 0, sizeof(state));
|
||||||
const char* scratch = src.c_str();
|
for (const char* scan = src.c_str(); n > 0 && *scan != 0; ) {
|
||||||
size_t len = mbsrtowcs(NULL, &scratch, 0, &state);
|
size_t mblen = mbrtowc(NULL, scan, n, &state);
|
||||||
if (len == (size_t)-1) {
|
switch (mblen) {
|
||||||
// invalid character in src
|
case (size_t)2:
|
||||||
return CString();
|
// incomplete last character. convert to unknown character.
|
||||||
|
setError(errors);
|
||||||
|
len += 1;
|
||||||
|
n = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case (size_t)1:
|
||||||
|
// invalid character. count one unknown character and
|
||||||
|
// start at the next byte.
|
||||||
|
setError(errors);
|
||||||
|
len += 1;
|
||||||
|
scan += 1;
|
||||||
|
n -= 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// normal character
|
||||||
|
len += 1;
|
||||||
|
scan += mblen;
|
||||||
|
n -= mblen;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
memset(&state, 0, sizeof(state));
|
||||||
|
|
||||||
|
// allocate wide character string
|
||||||
|
wchar_t* wcs = new wchar_t[len + 1];
|
||||||
|
|
||||||
// convert multibyte to wide char
|
// convert multibyte to wide char
|
||||||
scratch = src.c_str();
|
n = src.size();
|
||||||
wchar_t* dst = new wchar_t[len + 1];
|
wchar_t* dst = wcs;
|
||||||
mbsrtowcs(dst, &scratch, len + 1, &state);
|
for (const char* scan = src.c_str(); n > 0 && *scan != 0; ++dst) {
|
||||||
|
size_t mblen = mbrtowc(dst, scan, n, &state);
|
||||||
|
switch (mblen) {
|
||||||
|
case (size_t)2:
|
||||||
|
// incomplete character. convert to unknown character.
|
||||||
|
*dst = (wchar_t)0xfffd;
|
||||||
|
n = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case (size_t)1:
|
||||||
|
// invalid character. count one unknown character and
|
||||||
|
// start at the next byte.
|
||||||
|
scan += 1;
|
||||||
|
n -= 1;
|
||||||
|
*dst = (wchar_t)0xfffd;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// normal character
|
||||||
|
scan += mblen;
|
||||||
|
n -= mblen;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*dst = L'\0';
|
||||||
|
|
||||||
// convert to UTF8
|
// convert to UTF8
|
||||||
CString utf8 = wideCharToUTF8(dst);
|
CString utf8 = wideCharToUTF8(wcs, errors);
|
||||||
|
|
||||||
// clean up
|
// clean up
|
||||||
delete[] dst;
|
delete[] wcs;
|
||||||
|
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
wchar_t*
|
wchar_t*
|
||||||
CUnicode::UTF8ToWideChar(const CString& src)
|
CUnicode::UTF8ToWideChar(const CString& src, bool* errors)
|
||||||
{
|
{
|
||||||
// convert to platform's wide character encoding.
|
// convert to platform's wide character encoding.
|
||||||
// note -- this must include a wide nul character (independent of
|
// note -- this must include a wide nul character (independent of
|
||||||
|
@ -272,21 +387,26 @@ CUnicode::UTF8ToWideChar(const CString& src)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::wideCharToUTF8(const wchar_t* src)
|
CUnicode::wideCharToUTF8(const wchar_t* src, bool* errors)
|
||||||
{
|
{
|
||||||
// convert from platform's wide character encoding.
|
// convert from platform's wide character encoding.
|
||||||
// note -- this must include a wide nul character (independent of
|
// note -- this must include a wide nul character (independent of
|
||||||
// the CString's nul character).
|
// the CString's nul character).
|
||||||
#if WINDOWS_LIKE
|
#if WINDOWS_LIKE
|
||||||
return doUCS16ToUTF8(reinterpret_cast<const UInt8*>(src), wcslen(src));
|
return doUCS16ToUTF8(reinterpret_cast<const UInt8*>(src),
|
||||||
|
wcslen(src), errors);
|
||||||
#elif UNIX_LIKE
|
#elif UNIX_LIKE
|
||||||
return doUCS4ToUTF8(reinterpret_cast<const UInt8*>(src), wcslen(src));
|
return doUCS4ToUTF8(reinterpret_cast<const UInt8*>(src),
|
||||||
|
wcslen(src), errors);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n)
|
CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// make some space
|
// make some space
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(n);
|
dst.reserve(n);
|
||||||
|
@ -294,7 +414,7 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n)
|
||||||
// convert each character
|
// convert each character
|
||||||
for (; n > 0; data += 2, --n) {
|
for (; n > 0; data += 2, --n) {
|
||||||
UInt32 c = decode16(data);
|
UInt32 c = decode16(data);
|
||||||
toUTF8(dst, c);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
// remove extra trailing nul
|
||||||
|
@ -306,8 +426,11 @@ CUnicode::doUCS2ToUTF8(const UInt8* data, UInt32 n)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n)
|
CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// make some space
|
// make some space
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(n);
|
dst.reserve(n);
|
||||||
|
@ -315,7 +438,7 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n)
|
||||||
// convert each character
|
// convert each character
|
||||||
for (; n > 0; data += 4, --n) {
|
for (; n > 0; data += 4, --n) {
|
||||||
UInt32 c = decode32(data);
|
UInt32 c = decode32(data);
|
||||||
toUTF8(dst, c);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
// remove extra trailing nul
|
||||||
|
@ -327,8 +450,11 @@ CUnicode::doUCS4ToUTF8(const UInt8* data, UInt32 n)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n)
|
CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// make some space
|
// make some space
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(n);
|
dst.reserve(n);
|
||||||
|
@ -337,11 +463,12 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n)
|
||||||
for (; n > 0; data += 2, --n) {
|
for (; n > 0; data += 2, --n) {
|
||||||
UInt32 c = decode16(data);
|
UInt32 c = decode16(data);
|
||||||
if (c < 0x0000d800 || c > 0x0000dfff) {
|
if (c < 0x0000d800 || c > 0x0000dfff) {
|
||||||
toUTF8(dst, c);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
else if (n == 1) {
|
else if (n == 1) {
|
||||||
// error -- missing second word
|
// error -- missing second word
|
||||||
toUTF8(dst, s_replacement);
|
setError(errors);
|
||||||
|
toUTF8(dst, s_replacement, NULL);
|
||||||
}
|
}
|
||||||
else if (c >= 0x0000d800 && c <= 0x0000dbff) {
|
else if (c >= 0x0000d800 && c <= 0x0000dbff) {
|
||||||
UInt32 c2 = decode16(data);
|
UInt32 c2 = decode16(data);
|
||||||
|
@ -349,16 +476,18 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n)
|
||||||
--n;
|
--n;
|
||||||
if (c2 < 0x0000dc00 || c2 > 0x0000dfff) {
|
if (c2 < 0x0000dc00 || c2 > 0x0000dfff) {
|
||||||
// error -- [d800,dbff] not followed by [dc00,dfff]
|
// error -- [d800,dbff] not followed by [dc00,dfff]
|
||||||
toUTF8(dst, s_replacement);
|
setError(errors);
|
||||||
|
toUTF8(dst, s_replacement, NULL);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c = (((c - 0x0000d800) << 10) | (c2 - 0x0000dc00)) + 0x00010000;
|
c = (((c - 0x0000d800) << 10) | (c2 - 0x0000dc00)) + 0x00010000;
|
||||||
toUTF8(dst, c);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// error -- [dc00,dfff] without leading [d800,dbff]
|
// error -- [dc00,dfff] without leading [d800,dbff]
|
||||||
toUTF8(dst, s_replacement);
|
setError(errors);
|
||||||
|
toUTF8(dst, s_replacement, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,8 +500,11 @@ CUnicode::doUTF16ToUTF8(const UInt8* data, UInt32 n)
|
||||||
}
|
}
|
||||||
|
|
||||||
CString
|
CString
|
||||||
CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n)
|
CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n, bool* errors)
|
||||||
{
|
{
|
||||||
|
// default to success
|
||||||
|
resetError(errors);
|
||||||
|
|
||||||
// make some space
|
// make some space
|
||||||
CString dst;
|
CString dst;
|
||||||
dst.reserve(n);
|
dst.reserve(n);
|
||||||
|
@ -381,9 +513,10 @@ CUnicode::doUTF32ToUTF8(const UInt8* data, UInt32 n)
|
||||||
for (; n > 0; data += 4, --n) {
|
for (; n > 0; data += 4, --n) {
|
||||||
UInt32 c = decode32(data);
|
UInt32 c = decode32(data);
|
||||||
if (c >= 0x00110000) {
|
if (c >= 0x00110000) {
|
||||||
|
setError(errors);
|
||||||
c = s_replacement;
|
c = s_replacement;
|
||||||
}
|
}
|
||||||
toUTF8(dst, c);
|
toUTF8(dst, c, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove extra trailing nul
|
// remove extra trailing nul
|
||||||
|
@ -571,12 +704,13 @@ CUnicode::fromUTF8(const UInt8*& data, UInt32& n)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
CUnicode::toUTF8(CString& dst, UInt32 c)
|
CUnicode::toUTF8(CString& dst, UInt32 c, bool* errors)
|
||||||
{
|
{
|
||||||
UInt8 data[6];
|
UInt8 data[6];
|
||||||
|
|
||||||
// handle characters outside the valid range
|
// handle characters outside the valid range
|
||||||
if (c >= 0x80000000) {
|
if ((c >= 0x0000d800 && c <= 0x0000dfff) || c >= 0x80000000) {
|
||||||
|
setError(errors);
|
||||||
c = s_replacement;
|
c = s_replacement;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,39 +7,50 @@
|
||||||
|
|
||||||
class CUnicode {
|
class CUnicode {
|
||||||
public:
|
public:
|
||||||
static CString UTF8ToUCS2(const CString&);
|
// returns true iff the string contains a valid sequence of UTF-8
|
||||||
static CString UTF8ToUCS4(const CString&);
|
// encoded characters.
|
||||||
static CString UTF8ToUTF16(const CString&);
|
static bool isUTF8(const CString&);
|
||||||
static CString UTF8ToUTF32(const CString&);
|
|
||||||
|
|
||||||
static CString UCS2ToUTF8(const CString&);
|
// convert from UTF-8 encoding to other encodings. if errors is
|
||||||
static CString UCS4ToUTF8(const CString&);
|
// not NULL then it gets true if any characters could not be
|
||||||
static CString UTF16ToUTF8(const CString&);
|
// encoded in the target encoding and false otherwise. note
|
||||||
static CString UTF32ToUTF8(const CString&);
|
// that decoding errors do not set errors to error. UTF8ToText()
|
||||||
|
// converts to the current locale's (multibyte) encoding.
|
||||||
|
static CString UTF8ToUCS2(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF8ToUCS4(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF8ToUTF16(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF8ToUTF32(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF8ToText(const CString&, bool* errors = NULL);
|
||||||
|
|
||||||
// convert UTF-8 to/from the current locale's encoding
|
// convert from some encoding to UTF-8. if errors is not NULL
|
||||||
static CString UTF8ToText(const CString&);
|
// then it gets true if any characters could not be decoded and
|
||||||
static CString textToUTF8(const CString&);
|
// false otherwise. textToUTF8() converts from the current
|
||||||
|
// locale's (multibyte) encoding.
|
||||||
|
static CString UCS2ToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
static CString UCS4ToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF16ToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
static CString UTF32ToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
static CString textToUTF8(const CString&, bool* errors = NULL);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// convert UTF8 to nul terminated wchar_t string (using whatever
|
// convert UTF8 to nul terminated wchar_t string (using whatever
|
||||||
// encoding is native to the platform). caller must delete[]
|
// encoding is native to the platform). caller must delete[]
|
||||||
// the returned string.
|
// the returned string.
|
||||||
static wchar_t* UTF8ToWideChar(const CString&);
|
static wchar_t* UTF8ToWideChar(const CString&, bool* errors);
|
||||||
|
|
||||||
// convert nul terminated wchar_t string (in platform's native
|
// convert nul terminated wchar_t string (in platform's native
|
||||||
// encoding) to UTF8.
|
// encoding) to UTF8.
|
||||||
static CString wideCharToUTF8(const wchar_t*);
|
static CString wideCharToUTF8(const wchar_t*, bool* errors);
|
||||||
|
|
||||||
// internal conversion to UTF8
|
// internal conversion to UTF8
|
||||||
static CString doUCS2ToUTF8(const UInt8* src, UInt32 n);
|
static CString doUCS2ToUTF8(const UInt8* src, UInt32 n, bool* errors);
|
||||||
static CString doUCS4ToUTF8(const UInt8* src, UInt32 n);
|
static CString doUCS4ToUTF8(const UInt8* src, UInt32 n, bool* errors);
|
||||||
static CString doUTF16ToUTF8(const UInt8* src, UInt32 n);
|
static CString doUTF16ToUTF8(const UInt8* src, UInt32 n, bool* errors);
|
||||||
static CString doUTF32ToUTF8(const UInt8* src, UInt32 n);
|
static CString doUTF32ToUTF8(const UInt8* src, UInt32 n, bool* errors);
|
||||||
|
|
||||||
// convert characters to/from UTF8
|
// convert characters to/from UTF8
|
||||||
static UInt32 fromUTF8(const UInt8*& src, UInt32& size);
|
static UInt32 fromUTF8(const UInt8*& src, UInt32& size);
|
||||||
static void toUTF8(CString& dst, UInt32 c);
|
static void toUTF8(CString& dst, UInt32 c, bool* errors);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static UInt32 s_invalid;
|
static UInt32 s_invalid;
|
||||||
|
|
|
@ -151,22 +151,14 @@ CXWindowsClipboard::addSimpleRequest(Window requestor,
|
||||||
type = getTimestampData(data, &format);
|
type = getTimestampData(data, &format);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
char* name = XGetAtomName(m_display, target);
|
|
||||||
log((CLOG_INFO "request target: %d %s", target, name));
|
|
||||||
XFree(name);
|
|
||||||
IXWindowsClipboardConverter* converter = getConverter(target);
|
IXWindowsClipboardConverter* converter = getConverter(target);
|
||||||
if (converter != NULL) {
|
if (converter != NULL) {
|
||||||
log((CLOG_INFO "found converter"));
|
|
||||||
IClipboard::EFormat clipboardFormat = converter->getFormat();
|
IClipboard::EFormat clipboardFormat = converter->getFormat();
|
||||||
log((CLOG_INFO "clipboard format: %d", clipboardFormat));
|
|
||||||
if (m_added[clipboardFormat]) {
|
if (m_added[clipboardFormat]) {
|
||||||
log((CLOG_INFO "added"));
|
|
||||||
try {
|
try {
|
||||||
data = converter->fromIClipboard(m_data[clipboardFormat]);
|
data = converter->fromIClipboard(m_data[clipboardFormat]);
|
||||||
format = converter->getDataSize();
|
format = converter->getDataSize();
|
||||||
type = converter->getAtom();
|
type = converter->getAtom();
|
||||||
log((CLOG_INFO " src: (%d) %s", m_data[clipboardFormat].size(), m_data[clipboardFormat].c_str()));
|
|
||||||
log((CLOG_INFO " dst: (%d) %s", data.size(), data.c_str()));
|
|
||||||
}
|
}
|
||||||
catch (...) {
|
catch (...) {
|
||||||
// ignore -- cannot convert
|
// ignore -- cannot convert
|
||||||
|
@ -535,17 +527,10 @@ CXWindowsClipboard::icccmFillCache()
|
||||||
|
|
||||||
// add to clipboard and note we've done it
|
// add to clipboard and note we've done it
|
||||||
IClipboard::EFormat format = converter->getFormat();
|
IClipboard::EFormat format = converter->getFormat();
|
||||||
try {
|
|
||||||
m_data[format] = converter->toIClipboard(targetData);
|
m_data[format] = converter->toIClipboard(targetData);
|
||||||
if (!m_data[format].empty()) {
|
|
||||||
m_added[format] = true;
|
m_added[format] = true;
|
||||||
log((CLOG_DEBUG " added format %d for target %d", converter->getFormat(), target));
|
log((CLOG_DEBUG " added format %d for target %d", converter->getFormat(), target));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
catch (...) {
|
|
||||||
// ignore -- could not convert data
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@ -799,17 +784,10 @@ CXWindowsClipboard::motifFillCache()
|
||||||
|
|
||||||
// add to clipboard and note we've done it
|
// add to clipboard and note we've done it
|
||||||
IClipboard::EFormat format = converter->getFormat();
|
IClipboard::EFormat format = converter->getFormat();
|
||||||
try {
|
|
||||||
m_data[format] = converter->toIClipboard(targetData);
|
m_data[format] = converter->toIClipboard(targetData);
|
||||||
if (!m_data[format].empty()) {
|
|
||||||
m_added[format] = true;
|
m_added[format] = true;
|
||||||
log((CLOG_DEBUG " added format %d for target %d", converter->getFormat(), target));
|
log((CLOG_DEBUG " added format %d for target %d", converter->getFormat(), target));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
catch (...) {
|
|
||||||
// ignore -- could not convert data
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IClipboard::Time
|
IClipboard::Time
|
||||||
|
|
|
@ -44,5 +44,17 @@ CXWindowsClipboardTextConverter::fromIClipboard(const CString& data) const
|
||||||
CString
|
CString
|
||||||
CXWindowsClipboardTextConverter::toIClipboard(const CString& data) const
|
CXWindowsClipboardTextConverter::toIClipboard(const CString& data) const
|
||||||
{
|
{
|
||||||
return CUnicode::textToUTF8(data);
|
// convert to UTF-8
|
||||||
|
bool errors;
|
||||||
|
CString utf8 = CUnicode::textToUTF8(data, &errors);
|
||||||
|
|
||||||
|
// if there were decoding errors then, to support old applications
|
||||||
|
// that don't understand UTF-8 but can report the exact binary
|
||||||
|
// UTF-8 representation, see if the data appears to be UTF-8. if
|
||||||
|
// so then use it as is.
|
||||||
|
if (errors && CUnicode::isUTF8(data)) {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue