Removed use of mbrtowc, wcrtomb, and mbsinit. Many platforms
didn't support them and the emulated versions were just as good except for a performance problem with excessive locking and unlocking of a mutex. So this also changes IArchString to provide string rather than character conversion so we can lock the mutex once per string rather than once per character.
This commit is contained in:
parent
cc577dce7c
commit
54b3884eba
|
@ -142,7 +142,6 @@ AC_FUNC_STRFTIME
|
|||
AC_CHECK_FUNCS(gmtime_r)
|
||||
ACX_CHECK_GETPWUID_R
|
||||
AC_CHECK_FUNCS(vsnprintf)
|
||||
AC_CHECK_FUNCS(wcrtomb mbrtowc mbsinit)
|
||||
AC_FUNC_SELECT_ARGTYPES
|
||||
ACX_CHECK_POLL
|
||||
dnl use AC_REPLACE_FUNCS() for stuff in string.h
|
||||
|
|
|
@ -561,40 +561,16 @@ CArch::vsnprintf(char* str, int size, const char* fmt, va_list ap)
|
|||
return m_string->vsnprintf(str, size, fmt, ap);
|
||||
}
|
||||
|
||||
CArchMBState
|
||||
CArch::newMBState()
|
||||
int
|
||||
CArch::convStringMBToWC(wchar_t* dst, const char* src, UInt32 n, bool* errors)
|
||||
{
|
||||
return m_string->newMBState();
|
||||
}
|
||||
|
||||
void
|
||||
CArch::closeMBState(CArchMBState state)
|
||||
{
|
||||
m_string->closeMBState(state);
|
||||
}
|
||||
|
||||
void
|
||||
CArch::initMBState(CArchMBState state)
|
||||
{
|
||||
m_string->initMBState(state);
|
||||
}
|
||||
|
||||
bool
|
||||
CArch::isInitMBState(CArchMBState state)
|
||||
{
|
||||
return m_string->isInitMBState(state);
|
||||
return m_string->convStringMBToWC(dst, src, n, errors);
|
||||
}
|
||||
|
||||
int
|
||||
CArch::convMBToWC(wchar_t* dst, const char* src, int n, CArchMBState state)
|
||||
CArch::convStringWCToMB(char* dst, const wchar_t* src, UInt32 n, bool* errors)
|
||||
{
|
||||
return m_string->convMBToWC(dst, src, n, state);
|
||||
}
|
||||
|
||||
int
|
||||
CArch::convWCToMB(char* dst, wchar_t src, CArchMBState state)
|
||||
{
|
||||
return m_string->convWCToMB(dst, src, state);
|
||||
return m_string->convStringWCToMB(dst, src, n, errors);
|
||||
}
|
||||
|
||||
IArchString::EWideCharEncoding
|
||||
|
|
|
@ -159,12 +159,10 @@ public:
|
|||
// IArchString overrides
|
||||
virtual int vsnprintf(char* str,
|
||||
int size, const char* fmt, va_list ap);
|
||||
virtual CArchMBState newMBState();
|
||||
virtual void closeMBState(CArchMBState);
|
||||
virtual void initMBState(CArchMBState);
|
||||
virtual bool isInitMBState(CArchMBState);
|
||||
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
|
||||
virtual int convWCToMB(char*, wchar_t, CArchMBState);
|
||||
virtual int convStringMBToWC(wchar_t*,
|
||||
const char*, UInt32 n, bool* errors);
|
||||
virtual int convStringWCToMB(char*,
|
||||
const wchar_t*, UInt32 n, bool* errors);
|
||||
virtual EWideCharEncoding
|
||||
getWideCharEncoding();
|
||||
|
||||
|
|
|
@ -15,22 +15,11 @@
|
|||
#include "CArchStringUnix.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "CMultibyte.cpp"
|
||||
|
||||
//
|
||||
// CArchStringUnix
|
||||
//
|
||||
|
||||
CArchStringUnix::CArchStringUnix()
|
||||
{
|
||||
initMB();
|
||||
}
|
||||
|
||||
CArchStringUnix::~CArchStringUnix()
|
||||
{
|
||||
cleanMB();
|
||||
}
|
||||
|
||||
#include "CMultibyte.cpp"
|
||||
#include "vsnprintf.cpp"
|
||||
|
||||
IArchString::EWideCharEncoding
|
||||
|
|
|
@ -28,12 +28,10 @@ public:
|
|||
// IArchString overrides
|
||||
virtual int vsnprintf(char* str,
|
||||
int size, const char* fmt, va_list ap);
|
||||
virtual CArchMBState newMBState();
|
||||
virtual void closeMBState(CArchMBState);
|
||||
virtual void initMBState(CArchMBState);
|
||||
virtual bool isInitMBState(CArchMBState);
|
||||
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
|
||||
virtual int convWCToMB(char*, wchar_t, CArchMBState);
|
||||
virtual int convStringMBToWC(wchar_t*,
|
||||
const char*, UInt32 n, bool* errors);
|
||||
virtual int convStringWCToMB(char*,
|
||||
const wchar_t*, UInt32 n, bool* errors);
|
||||
virtual EWideCharEncoding
|
||||
getWideCharEncoding();
|
||||
};
|
||||
|
|
|
@ -18,22 +18,11 @@
|
|||
#include <windows.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "CMultibyte.cpp"
|
||||
|
||||
//
|
||||
// CArchStringWindows
|
||||
//
|
||||
|
||||
CArchStringWindows::CArchStringWindows()
|
||||
{
|
||||
initMB();
|
||||
}
|
||||
|
||||
CArchStringWindows::~CArchStringWindows()
|
||||
{
|
||||
cleanMB();
|
||||
}
|
||||
|
||||
#include "CMultibyte.cpp"
|
||||
#define HAVE_VSNPRINTF 1
|
||||
#define ARCH_VSNPRINTF _vsnprintf
|
||||
#include "vsnprintf.cpp"
|
||||
|
|
|
@ -28,12 +28,10 @@ public:
|
|||
// IArchString overrides
|
||||
virtual int vsnprintf(char* str,
|
||||
int size, const char* fmt, va_list ap);
|
||||
virtual CArchMBState newMBState();
|
||||
virtual void closeMBState(CArchMBState);
|
||||
virtual void initMBState(CArchMBState);
|
||||
virtual bool isInitMBState(CArchMBState);
|
||||
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
|
||||
virtual int convWCToMB(char*, wchar_t, CArchMBState);
|
||||
virtual int convStringMBToWC(wchar_t*,
|
||||
const char*, UInt32 n, bool* errors);
|
||||
virtual int convStringWCToMB(char*,
|
||||
const wchar_t*, UInt32 n, bool* errors);
|
||||
virtual EWideCharEncoding
|
||||
getWideCharEncoding();
|
||||
};
|
||||
|
|
|
@ -16,25 +16,192 @@
|
|||
#define CMULTIBYTE_H
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if (HAVE_MBSINIT && HAVE_MBRTOWC && HAVE_WCRTOMB) || defined(_MSC_VER)
|
||||
#include "CMultibyteOS.cpp"
|
||||
#include "CArch.h"
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#if HAVE_WCHAR_H
|
||||
# include <wchar.h>
|
||||
#elif __APPLE__
|
||||
// wtf? Darwin puts mbtowc() et al. in stdlib
|
||||
# include <stdlib.h>
|
||||
#else
|
||||
#include "CMultibyteEmu.cpp"
|
||||
#endif
|
||||
// platform apparently has no wchar_t support. provide dummy
|
||||
// implementations. hopefully at least the C++ compiler has
|
||||
// a built-in wchar_t type.
|
||||
|
||||
CArchMBState
|
||||
ARCH_STRING::newMBState()
|
||||
static inline
|
||||
int
|
||||
mbtowc(wchar_t* dst, const char* src, int n)
|
||||
{
|
||||
CArchMBState state = new CArchMBStateImpl;
|
||||
initMBState(state);
|
||||
return state;
|
||||
*dst = static_cast<wchar_t>(*src);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
ARCH_STRING::closeMBState(CArchMBState state)
|
||||
static inline
|
||||
int
|
||||
wctomb(char* dst, wchar_t src)
|
||||
{
|
||||
delete state;
|
||||
*dst = static_cast<char>(src);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// use C library non-reentrant multibyte conversion with mutex
|
||||
//
|
||||
|
||||
static CArchMutex s_mutex = NULL;
|
||||
|
||||
ARCH_STRING::ARCH_STRING()
|
||||
{
|
||||
s_mutex = ARCH->newMutex();
|
||||
}
|
||||
|
||||
ARCH_STRING::~ARCH_STRING()
|
||||
{
|
||||
ARCH->closeMutex(s_mutex);
|
||||
s_mutex = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convStringWCToMB(char* dst,
|
||||
const wchar_t* src, UInt32 n, bool* errors)
|
||||
{
|
||||
int len = 0;
|
||||
|
||||
bool dummyErrors;
|
||||
if (errors == NULL) {
|
||||
errors = &dummyErrors;
|
||||
}
|
||||
|
||||
ARCH->lockMutex(s_mutex);
|
||||
if (dst == NULL) {
|
||||
char dummy[MB_LEN_MAX];
|
||||
for (const wchar_t* scan = src; n > 0; ++scan, --n) {
|
||||
int mblen = wctomb(dummy, *scan);
|
||||
if (mblen == -1) {
|
||||
*errors = true;
|
||||
mblen = 1;
|
||||
}
|
||||
len += mblen;
|
||||
}
|
||||
int mblen = wctomb(NULL, L'\0');
|
||||
if (mblen != -1) {
|
||||
len += mblen - 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
char* dst0 = dst;
|
||||
for (const wchar_t* scan = src; n > 0; ++scan, --n) {
|
||||
int mblen = wctomb(dst, *scan);
|
||||
if (mblen == -1) {
|
||||
*errors = true;
|
||||
*dst++ = '?';
|
||||
}
|
||||
else {
|
||||
dst += mblen;
|
||||
}
|
||||
}
|
||||
int mblen = wctomb(dst, L'\0');
|
||||
if (mblen != -1) {
|
||||
// don't include nul terminator
|
||||
dst += mblen - 1;
|
||||
}
|
||||
len = (int)(dst - dst0);
|
||||
}
|
||||
ARCH->unlockMutex(s_mutex);
|
||||
|
||||
return (ssize_t)len;
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convStringMBToWC(wchar_t* dst,
|
||||
const char* src, UInt32 n, bool* errors)
|
||||
{
|
||||
int len = 0;
|
||||
wchar_t dummy;
|
||||
|
||||
bool dummyErrors;
|
||||
if (errors == NULL) {
|
||||
errors = &dummyErrors;
|
||||
}
|
||||
|
||||
ARCH->lockMutex(s_mutex);
|
||||
if (dst == NULL) {
|
||||
for (const char* scan = src; n > 0; ) {
|
||||
int mblen = mbtowc(&dummy, scan, n);
|
||||
switch (mblen) {
|
||||
case -2:
|
||||
// incomplete last character. convert to unknown character.
|
||||
*errors = true;
|
||||
len += 1;
|
||||
n = 0;
|
||||
break;
|
||||
|
||||
case -1:
|
||||
// invalid character. count one unknown character and
|
||||
// start at the next byte.
|
||||
*errors = true;
|
||||
len += 1;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
len += 1;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// normal character
|
||||
len += 1;
|
||||
scan += mblen;
|
||||
n -= mblen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
wchar_t* dst0 = dst;
|
||||
for (const char* scan = src; n > 0; ++dst) {
|
||||
int mblen = mbtowc(dst, scan, n);
|
||||
switch (mblen) {
|
||||
case -2:
|
||||
// incomplete character. convert to unknown character.
|
||||
*errors = true;
|
||||
*dst = (wchar_t)0xfffd;
|
||||
n = 0;
|
||||
break;
|
||||
|
||||
case -1:
|
||||
// invalid character. count one unknown character and
|
||||
// start at the next byte.
|
||||
*errors = true;
|
||||
*dst = (wchar_t)0xfffd;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
*dst = (wchar_t)0x0000;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// normal character
|
||||
scan += mblen;
|
||||
n -= mblen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
len = (int)(dst - dst0);
|
||||
}
|
||||
ARCH->unlockMutex(s_mutex);
|
||||
|
||||
return (ssize_t)len;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* synergy -- mouse and keyboard sharing utility
|
||||
* Copyright (C) 2002 Chris Schoeneman
|
||||
*
|
||||
* This package is free software you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* found in the file COPYING that should have accompanied this file.
|
||||
*
|
||||
* This package is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "CArch.h"
|
||||
#include <string.h>
|
||||
#if HAVE_WCHAR_H
|
||||
# include <wchar.h>
|
||||
#elif __APPLE__
|
||||
// wtf? Darwin puts mbtowc() et al. in stdlib
|
||||
# include <stdlib.h>
|
||||
#else
|
||||
// platform apparently has no wchar_t support. provide dummy
|
||||
// implementations. hopefully at least the C++ compiler has
|
||||
// a built-in wchar_t type.
|
||||
# undef HAVE_MBSINIT
|
||||
|
||||
static inline
|
||||
int
|
||||
mbtowc(wchar_t* dst, const char* src, int n)
|
||||
{
|
||||
*dst = static_cast<wchar_t>(*src);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline
|
||||
int
|
||||
wctomb(char* dst, wchar_t src)
|
||||
{
|
||||
*dst = static_cast<char>(src);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
class CArchMBStateImpl {
|
||||
public:
|
||||
#if HAVE_MBSINIT
|
||||
mbstate_t m_mbstate;
|
||||
#else
|
||||
int m_mbstate; // dummy data
|
||||
#endif
|
||||
};
|
||||
|
||||
//
|
||||
// use C library non-reentrant multibyte conversion with mutex
|
||||
//
|
||||
|
||||
static CArchMutex s_mutex;
|
||||
|
||||
static
|
||||
void
|
||||
initMB()
|
||||
{
|
||||
s_mutex = ARCH->newMutex();
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
cleanMB()
|
||||
{
|
||||
ARCH->closeMutex(s_mutex);
|
||||
}
|
||||
|
||||
void
|
||||
ARCH_STRING::initMBState(CArchMBState state)
|
||||
{
|
||||
memset(&state->m_mbstate, 0, sizeof(state->m_mbstate));
|
||||
}
|
||||
|
||||
bool
|
||||
ARCH_STRING::isInitMBState(CArchMBState state)
|
||||
{
|
||||
// if we're using this file mbsinit() probably doesn't exist
|
||||
// but use it if it does
|
||||
#if HAVE_MBSINIT
|
||||
return (mbsinit(&state->m_mbstate) != 0);
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convMBToWC(wchar_t* dst, const char* src, int n, CArchMBState)
|
||||
{
|
||||
wchar_t dummy;
|
||||
ARCH->lockMutex(s_mutex);
|
||||
int result = mbtowc(dst != NULL ? dst : &dummy, src, n);
|
||||
ARCH->unlockMutex(s_mutex);
|
||||
if (result < 0)
|
||||
return -1;
|
||||
else
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convWCToMB(char* dst, wchar_t src, CArchMBState)
|
||||
{
|
||||
char dummy[MB_LEN_MAX];
|
||||
ARCH->lockMutex(s_mutex);
|
||||
int n = wctomb(dst != NULL ? dst : dummy, src);
|
||||
ARCH->unlockMutex(s_mutex);
|
||||
return n;
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* synergy -- mouse and keyboard sharing utility
|
||||
* Copyright (C) 2002 Chris Schoeneman
|
||||
*
|
||||
* This package is free software you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* found in the file COPYING that should have accompanied this file.
|
||||
*
|
||||
* This package is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <cwchar>
|
||||
#include <limits.h>
|
||||
|
||||
class CArchMBStateImpl {
|
||||
public:
|
||||
mbstate_t m_mbstate;
|
||||
};
|
||||
|
||||
//
|
||||
// use C library reentrant multibyte conversion
|
||||
//
|
||||
|
||||
static
|
||||
void
|
||||
initMB()
|
||||
{
|
||||
// do nothing
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
cleanMB()
|
||||
{
|
||||
// do nothing
|
||||
}
|
||||
|
||||
void
|
||||
ARCH_STRING::initMBState(CArchMBState state)
|
||||
{
|
||||
memset(&state->m_mbstate, 0, sizeof(state->m_mbstate));
|
||||
}
|
||||
|
||||
bool
|
||||
ARCH_STRING::isInitMBState(CArchMBState state)
|
||||
{
|
||||
return (mbsinit(&state->m_mbstate) != 0);
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convMBToWC(wchar_t* dst, const char* src,
|
||||
int n, CArchMBState state)
|
||||
{
|
||||
wchar_t dummy;
|
||||
return static_cast<int>(mbrtowc(dst != NULL ? dst : &dummy,
|
||||
src, static_cast<size_t>(n), &state->m_mbstate));
|
||||
}
|
||||
|
||||
int
|
||||
ARCH_STRING::convWCToMB(char* dst, wchar_t src, CArchMBState state)
|
||||
{
|
||||
char dummy[MB_LEN_MAX];
|
||||
return static_cast<int>(wcrtomb(dst != NULL ? dst : dummy,
|
||||
src, &state->m_mbstate));
|
||||
}
|
|
@ -16,23 +16,9 @@
|
|||
#define IARCHSTRING_H
|
||||
|
||||
#include "IInterface.h"
|
||||
#include "BasicTypes.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
/*!
|
||||
\class CArchMBStateImpl
|
||||
\brief Internal multibyte conversion data.
|
||||
An architecture dependent type holding the necessary data for a
|
||||
multibyte to/from wide character conversion.
|
||||
*/
|
||||
class CArchMBStateImpl;
|
||||
|
||||
/*!
|
||||
\var CArchMBState
|
||||
\brief Opaque multibyte conversion state type.
|
||||
An opaque type representing multibyte conversion state.
|
||||
*/
|
||||
typedef CArchMBStateImpl* CArchMBState;
|
||||
|
||||
//! Interface for architecture dependent string operations
|
||||
/*!
|
||||
This interface defines the string operations required by
|
||||
|
@ -64,24 +50,13 @@ public:
|
|||
virtual int vsnprintf(char* str,
|
||||
int size, const char* fmt, va_list ap) = 0;
|
||||
|
||||
//! Create a new multibyte conversion state
|
||||
virtual CArchMBState newMBState() = 0;
|
||||
//! Convert multibyte string to wide character string
|
||||
virtual int convStringMBToWC(wchar_t*,
|
||||
const char*, UInt32 n, bool* errors) = 0;
|
||||
|
||||
//! Destroy a multibyte conversion state
|
||||
virtual void closeMBState(CArchMBState) = 0;
|
||||
|
||||
//! Initialize a multibyte conversion state
|
||||
virtual void initMBState(CArchMBState) = 0;
|
||||
|
||||
//! Test a multibyte conversion state
|
||||
virtual bool isInitMBState(CArchMBState) = 0;
|
||||
|
||||
//! Convert multibyte to wide character
|
||||
virtual int convMBToWC(wchar_t*,
|
||||
const char*, int, CArchMBState) = 0;
|
||||
|
||||
//! Convert wide character to multibyte
|
||||
virtual int convWCToMB(char*, wchar_t, CArchMBState) = 0;
|
||||
//! Convert wide character string to multibyte string
|
||||
virtual int convStringWCToMB(char*,
|
||||
const wchar_t*, UInt32 n, bool* errors) = 0;
|
||||
|
||||
//! Return the architecture's native wide character encoding
|
||||
virtual EWideCharEncoding
|
||||
|
|
|
@ -86,8 +86,6 @@ WIN32_SOURCE_FILES = \
|
|||
EXTRA_DIST = \
|
||||
arch.dsp \
|
||||
CMultibyte.cpp \
|
||||
CMultibyteEmu.cpp \
|
||||
CMultibyteOS.cpp \
|
||||
vsnprintf.cpp \
|
||||
$(UNIX_SOURCE_FILES) \
|
||||
$(WIN32_SOURCE_FILES) \
|
||||
|
|
|
@ -140,16 +140,6 @@ SOURCE=.\CMultibyte.cpp
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\CMultibyteEmu.cpp
|
||||
# PROP Exclude_From_Build 1
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\CMultibyteOS.cpp
|
||||
# PROP Exclude_From_Build 1
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\vsnprintf.cpp
|
||||
# PROP Exclude_From_Build 1
|
||||
# End Source File
|
||||
|
|
|
@ -220,57 +220,15 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
|
|||
UInt32 size;
|
||||
wchar_t* tmp = UTF8ToWideChar(src, size, errors);
|
||||
|
||||
// get length of multibyte string
|
||||
int mblen;
|
||||
CArchMBState state = ARCH->newMBState();
|
||||
size_t len = 0;
|
||||
UInt32 n = size;
|
||||
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
||||
mblen = ARCH->convWCToMB(NULL, *scan, state);
|
||||
if (mblen == -1) {
|
||||
// unconvertable character
|
||||
setError(errors);
|
||||
len += 1;
|
||||
}
|
||||
else {
|
||||
len += mblen;
|
||||
}
|
||||
}
|
||||
|
||||
// handle nul terminator
|
||||
mblen = ARCH->convWCToMB(NULL, L'\0', state);
|
||||
if (mblen != -1) {
|
||||
len += mblen;
|
||||
}
|
||||
assert(ARCH->isInitMBState(state) != 0);
|
||||
|
||||
// allocate multibyte string
|
||||
char* mbs = new char[len];
|
||||
|
||||
// convert to multibyte
|
||||
char* dst = mbs;
|
||||
n = size;
|
||||
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
|
||||
mblen = ARCH->convWCToMB(dst, *scan, state);
|
||||
if (mblen == -1) {
|
||||
// unconvertable character
|
||||
*dst++ = '?';
|
||||
}
|
||||
else {
|
||||
dst += mblen;
|
||||
}
|
||||
}
|
||||
mblen = ARCH->convWCToMB(dst, L'\0', state);
|
||||
if (mblen != -1) {
|
||||
// don't include nul terminator
|
||||
dst += mblen - 1;
|
||||
}
|
||||
CString text(mbs, dst - mbs);
|
||||
// convert string to multibyte
|
||||
int len = ARCH->convStringWCToMB(NULL, tmp, size, errors);
|
||||
char* mbs = new char[len + 1];
|
||||
ARCH->convStringWCToMB(mbs, tmp, size, errors);
|
||||
CString text(mbs, len);
|
||||
|
||||
// clean up
|
||||
delete[] mbs;
|
||||
delete[] tmp;
|
||||
ARCH->closeMBState(state);
|
||||
|
||||
return text;
|
||||
}
|
||||
|
@ -325,88 +283,17 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
|
|||
// default to success
|
||||
resetError(errors);
|
||||
|
||||
// get length of multibyte string
|
||||
UInt32 n = src.size();
|
||||
size_t len = 0;
|
||||
CArchMBState state = ARCH->newMBState();
|
||||
for (const char* scan = src.c_str(); n > 0; ) {
|
||||
int mblen = ARCH->convMBToWC(NULL, scan, n, state);
|
||||
switch (mblen) {
|
||||
case -2:
|
||||
// incomplete last character. convert to unknown character.
|
||||
setError(errors);
|
||||
len += 1;
|
||||
n = 0;
|
||||
break;
|
||||
|
||||
case -1:
|
||||
// invalid character. count one unknown character and
|
||||
// start at the next byte.
|
||||
setError(errors);
|
||||
len += 1;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
len += 1;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// normal character
|
||||
len += 1;
|
||||
scan += mblen;
|
||||
n -= mblen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ARCH->initMBState(state);
|
||||
|
||||
// allocate wide character string
|
||||
wchar_t* wcs = new wchar_t[len];
|
||||
|
||||
// convert multibyte to wide char
|
||||
n = src.size();
|
||||
wchar_t* dst = wcs;
|
||||
for (const char* scan = src.c_str(); n > 0; ++dst) {
|
||||
int mblen = ARCH->convMBToWC(dst, scan, n, state);
|
||||
switch (mblen) {
|
||||
case -2:
|
||||
// incomplete character. convert to unknown character.
|
||||
*dst = (wchar_t)0xfffd;
|
||||
n = 0;
|
||||
break;
|
||||
|
||||
case -1:
|
||||
// invalid character. count one unknown character and
|
||||
// start at the next byte.
|
||||
*dst = (wchar_t)0xfffd;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
*dst = (wchar_t)0x0000;
|
||||
scan += 1;
|
||||
n -= 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// normal character
|
||||
scan += mblen;
|
||||
n -= mblen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// convert string to wide characters
|
||||
UInt32 n = src.size();
|
||||
int len = ARCH->convStringMBToWC(NULL, src.c_str(), n, errors);
|
||||
wchar_t* wcs = new wchar_t[len + 1];
|
||||
ARCH->convStringMBToWC(wcs, src.c_str(), n, errors);
|
||||
|
||||
// convert to UTF8
|
||||
CString utf8 = wideCharToUTF8(wcs, len, errors);
|
||||
|
||||
// clean up
|
||||
delete[] wcs;
|
||||
ARCH->closeMBState(state);
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue