Removed use of mbrtowc, wcrtomb, and mbsinit. Many platforms

didn't support them and the emulated versions were just as good
except for a performance problem with excessive locking and
unlocking of a mutex.  So this also changes IArchString to
provide string rather than character conversion so we can lock
the mutex once per string rather than once per character.
This commit is contained in:
crs 2004-04-13 19:39:04 +00:00
parent cc577dce7c
commit 54b3884eba
14 changed files with 216 additions and 435 deletions

View File

@ -142,7 +142,6 @@ AC_FUNC_STRFTIME
AC_CHECK_FUNCS(gmtime_r)
ACX_CHECK_GETPWUID_R
AC_CHECK_FUNCS(vsnprintf)
AC_CHECK_FUNCS(wcrtomb mbrtowc mbsinit)
AC_FUNC_SELECT_ARGTYPES
ACX_CHECK_POLL
dnl use AC_REPLACE_FUNCS() for stuff in string.h

View File

@ -561,40 +561,16 @@ CArch::vsnprintf(char* str, int size, const char* fmt, va_list ap)
return m_string->vsnprintf(str, size, fmt, ap);
}
CArchMBState
CArch::newMBState()
int
CArch::convStringMBToWC(wchar_t* dst, const char* src, UInt32 n, bool* errors)
{
return m_string->newMBState();
}
void
CArch::closeMBState(CArchMBState state)
{
m_string->closeMBState(state);
}
void
CArch::initMBState(CArchMBState state)
{
m_string->initMBState(state);
}
bool
CArch::isInitMBState(CArchMBState state)
{
return m_string->isInitMBState(state);
return m_string->convStringMBToWC(dst, src, n, errors);
}
int
CArch::convMBToWC(wchar_t* dst, const char* src, int n, CArchMBState state)
CArch::convStringWCToMB(char* dst, const wchar_t* src, UInt32 n, bool* errors)
{
return m_string->convMBToWC(dst, src, n, state);
}
int
CArch::convWCToMB(char* dst, wchar_t src, CArchMBState state)
{
return m_string->convWCToMB(dst, src, state);
return m_string->convStringWCToMB(dst, src, n, errors);
}
IArchString::EWideCharEncoding

View File

@ -159,12 +159,10 @@ public:
// IArchString overrides
virtual int vsnprintf(char* str,
int size, const char* fmt, va_list ap);
virtual CArchMBState newMBState();
virtual void closeMBState(CArchMBState);
virtual void initMBState(CArchMBState);
virtual bool isInitMBState(CArchMBState);
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
virtual int convWCToMB(char*, wchar_t, CArchMBState);
virtual int convStringMBToWC(wchar_t*,
const char*, UInt32 n, bool* errors);
virtual int convStringWCToMB(char*,
const wchar_t*, UInt32 n, bool* errors);
virtual EWideCharEncoding
getWideCharEncoding();

View File

@ -15,22 +15,11 @@
#include "CArchStringUnix.h"
#include <stdio.h>
#include "CMultibyte.cpp"
//
// CArchStringUnix
//
CArchStringUnix::CArchStringUnix()
{
initMB();
}
CArchStringUnix::~CArchStringUnix()
{
cleanMB();
}
#include "CMultibyte.cpp"
#include "vsnprintf.cpp"
IArchString::EWideCharEncoding

View File

@ -28,12 +28,10 @@ public:
// IArchString overrides
virtual int vsnprintf(char* str,
int size, const char* fmt, va_list ap);
virtual CArchMBState newMBState();
virtual void closeMBState(CArchMBState);
virtual void initMBState(CArchMBState);
virtual bool isInitMBState(CArchMBState);
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
virtual int convWCToMB(char*, wchar_t, CArchMBState);
virtual int convStringMBToWC(wchar_t*,
const char*, UInt32 n, bool* errors);
virtual int convStringWCToMB(char*,
const wchar_t*, UInt32 n, bool* errors);
virtual EWideCharEncoding
getWideCharEncoding();
};

View File

@ -18,22 +18,11 @@
#include <windows.h>
#include <stdio.h>
#include "CMultibyte.cpp"
//
// CArchStringWindows
//
CArchStringWindows::CArchStringWindows()
{
initMB();
}
CArchStringWindows::~CArchStringWindows()
{
cleanMB();
}
#include "CMultibyte.cpp"
#define HAVE_VSNPRINTF 1
#define ARCH_VSNPRINTF _vsnprintf
#include "vsnprintf.cpp"

View File

@ -28,12 +28,10 @@ public:
// IArchString overrides
virtual int vsnprintf(char* str,
int size, const char* fmt, va_list ap);
virtual CArchMBState newMBState();
virtual void closeMBState(CArchMBState);
virtual void initMBState(CArchMBState);
virtual bool isInitMBState(CArchMBState);
virtual int convMBToWC(wchar_t*, const char*, int, CArchMBState);
virtual int convWCToMB(char*, wchar_t, CArchMBState);
virtual int convStringMBToWC(wchar_t*,
const char*, UInt32 n, bool* errors);
virtual int convStringWCToMB(char*,
const wchar_t*, UInt32 n, bool* errors);
virtual EWideCharEncoding
getWideCharEncoding();
};

View File

@ -16,25 +16,192 @@
#define CMULTIBYTE_H
#include "common.h"
#if (HAVE_MBSINIT && HAVE_MBRTOWC && HAVE_WCRTOMB) || defined(_MSC_VER)
#include "CMultibyteOS.cpp"
#include "CArch.h"
#include <limits.h>
#include <string.h>
#if HAVE_WCHAR_H
# include <wchar.h>
#elif __APPLE__
// wtf? Darwin puts mbtowc() et al. in stdlib
# include <stdlib.h>
#else
#include "CMultibyteEmu.cpp"
#endif
// platform apparently has no wchar_t support. provide dummy
// implementations. hopefully at least the C++ compiler has
// a built-in wchar_t type.
CArchMBState
ARCH_STRING::newMBState()
static inline
int
mbtowc(wchar_t* dst, const char* src, int n)
{
CArchMBState state = new CArchMBStateImpl;
initMBState(state);
return state;
*dst = static_cast<wchar_t>(*src);
return 1;
}
void
ARCH_STRING::closeMBState(CArchMBState state)
static inline
int
wctomb(char* dst, wchar_t src)
{
delete state;
*dst = static_cast<char>(src);
return 1;
}
#endif
//
// use C library non-reentrant multibyte conversion with mutex
//
static CArchMutex s_mutex = NULL;
ARCH_STRING::ARCH_STRING()
{
s_mutex = ARCH->newMutex();
}
ARCH_STRING::~ARCH_STRING()
{
ARCH->closeMutex(s_mutex);
s_mutex = NULL;
}
int
ARCH_STRING::convStringWCToMB(char* dst,
const wchar_t* src, UInt32 n, bool* errors)
{
int len = 0;
bool dummyErrors;
if (errors == NULL) {
errors = &dummyErrors;
}
ARCH->lockMutex(s_mutex);
if (dst == NULL) {
char dummy[MB_LEN_MAX];
for (const wchar_t* scan = src; n > 0; ++scan, --n) {
int mblen = wctomb(dummy, *scan);
if (mblen == -1) {
*errors = true;
mblen = 1;
}
len += mblen;
}
int mblen = wctomb(NULL, L'\0');
if (mblen != -1) {
len += mblen - 1;
}
}
else {
char* dst0 = dst;
for (const wchar_t* scan = src; n > 0; ++scan, --n) {
int mblen = wctomb(dst, *scan);
if (mblen == -1) {
*errors = true;
*dst++ = '?';
}
else {
dst += mblen;
}
}
int mblen = wctomb(dst, L'\0');
if (mblen != -1) {
// don't include nul terminator
dst += mblen - 1;
}
len = (int)(dst - dst0);
}
ARCH->unlockMutex(s_mutex);
return (ssize_t)len;
}
int
ARCH_STRING::convStringMBToWC(wchar_t* dst,
const char* src, UInt32 n, bool* errors)
{
int len = 0;
wchar_t dummy;
bool dummyErrors;
if (errors == NULL) {
errors = &dummyErrors;
}
ARCH->lockMutex(s_mutex);
if (dst == NULL) {
for (const char* scan = src; n > 0; ) {
int mblen = mbtowc(&dummy, scan, n);
switch (mblen) {
case -2:
// incomplete last character. convert to unknown character.
*errors = true;
len += 1;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
*errors = true;
len += 1;
scan += 1;
n -= 1;
break;
case 0:
len += 1;
scan += 1;
n -= 1;
break;
default:
// normal character
len += 1;
scan += mblen;
n -= mblen;
break;
}
}
}
else {
wchar_t* dst0 = dst;
for (const char* scan = src; n > 0; ++dst) {
int mblen = mbtowc(dst, scan, n);
switch (mblen) {
case -2:
// incomplete character. convert to unknown character.
*errors = true;
*dst = (wchar_t)0xfffd;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
*errors = true;
*dst = (wchar_t)0xfffd;
scan += 1;
n -= 1;
break;
case 0:
*dst = (wchar_t)0x0000;
scan += 1;
n -= 1;
break;
default:
// normal character
scan += mblen;
n -= mblen;
break;
}
}
len = (int)(dst - dst0);
}
ARCH->unlockMutex(s_mutex);
return (ssize_t)len;
}
#endif

View File

@ -1,114 +0,0 @@
/*
* synergy -- mouse and keyboard sharing utility
* Copyright (C) 2002 Chris Schoeneman
*
* This package is free software you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* found in the file COPYING that should have accompanied this file.
*
* This package is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "CArch.h"
#include <string.h>
#if HAVE_WCHAR_H
# include <wchar.h>
#elif __APPLE__
// wtf? Darwin puts mbtowc() et al. in stdlib
# include <stdlib.h>
#else
// platform apparently has no wchar_t support. provide dummy
// implementations. hopefully at least the C++ compiler has
// a built-in wchar_t type.
# undef HAVE_MBSINIT
static inline
int
mbtowc(wchar_t* dst, const char* src, int n)
{
*dst = static_cast<wchar_t>(*src);
return 1;
}
static inline
int
wctomb(char* dst, wchar_t src)
{
*dst = static_cast<char>(src);
return 1;
}
#endif
class CArchMBStateImpl {
public:
#if HAVE_MBSINIT
mbstate_t m_mbstate;
#else
int m_mbstate; // dummy data
#endif
};
//
// use C library non-reentrant multibyte conversion with mutex
//
static CArchMutex s_mutex;
static
void
initMB()
{
s_mutex = ARCH->newMutex();
}
static
void
cleanMB()
{
ARCH->closeMutex(s_mutex);
}
void
ARCH_STRING::initMBState(CArchMBState state)
{
memset(&state->m_mbstate, 0, sizeof(state->m_mbstate));
}
bool
ARCH_STRING::isInitMBState(CArchMBState state)
{
// if we're using this file mbsinit() probably doesn't exist
// but use it if it does
#if HAVE_MBSINIT
return (mbsinit(&state->m_mbstate) != 0);
#else
return true;
#endif
}
int
ARCH_STRING::convMBToWC(wchar_t* dst, const char* src, int n, CArchMBState)
{
wchar_t dummy;
ARCH->lockMutex(s_mutex);
int result = mbtowc(dst != NULL ? dst : &dummy, src, n);
ARCH->unlockMutex(s_mutex);
if (result < 0)
return -1;
else
return result;
}
int
ARCH_STRING::convWCToMB(char* dst, wchar_t src, CArchMBState)
{
char dummy[MB_LEN_MAX];
ARCH->lockMutex(s_mutex);
int n = wctomb(dst != NULL ? dst : dummy, src);
ARCH->unlockMutex(s_mutex);
return n;
}

View File

@ -1,69 +0,0 @@
/*
* synergy -- mouse and keyboard sharing utility
* Copyright (C) 2002 Chris Schoeneman
*
* This package is free software you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* found in the file COPYING that should have accompanied this file.
*
* This package is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <string.h>
#include <cwchar>
#include <limits.h>
class CArchMBStateImpl {
public:
mbstate_t m_mbstate;
};
//
// use C library reentrant multibyte conversion
//
static
void
initMB()
{
// do nothing
}
static
void
cleanMB()
{
// do nothing
}
void
ARCH_STRING::initMBState(CArchMBState state)
{
memset(&state->m_mbstate, 0, sizeof(state->m_mbstate));
}
bool
ARCH_STRING::isInitMBState(CArchMBState state)
{
return (mbsinit(&state->m_mbstate) != 0);
}
int
ARCH_STRING::convMBToWC(wchar_t* dst, const char* src,
int n, CArchMBState state)
{
wchar_t dummy;
return static_cast<int>(mbrtowc(dst != NULL ? dst : &dummy,
src, static_cast<size_t>(n), &state->m_mbstate));
}
int
ARCH_STRING::convWCToMB(char* dst, wchar_t src, CArchMBState state)
{
char dummy[MB_LEN_MAX];
return static_cast<int>(wcrtomb(dst != NULL ? dst : dummy,
src, &state->m_mbstate));
}

View File

@ -16,23 +16,9 @@
#define IARCHSTRING_H
#include "IInterface.h"
#include "BasicTypes.h"
#include <stdarg.h>
/*!
\class CArchMBStateImpl
\brief Internal multibyte conversion data.
An architecture dependent type holding the necessary data for a
multibyte to/from wide character conversion.
*/
class CArchMBStateImpl;
/*!
\var CArchMBState
\brief Opaque multibyte conversion state type.
An opaque type representing multibyte conversion state.
*/
typedef CArchMBStateImpl* CArchMBState;
//! Interface for architecture dependent string operations
/*!
This interface defines the string operations required by
@ -64,24 +50,13 @@ public:
virtual int vsnprintf(char* str,
int size, const char* fmt, va_list ap) = 0;
//! Create a new multibyte conversion state
virtual CArchMBState newMBState() = 0;
//! Convert multibyte string to wide character string
virtual int convStringMBToWC(wchar_t*,
const char*, UInt32 n, bool* errors) = 0;
//! Destroy a multibyte conversion state
virtual void closeMBState(CArchMBState) = 0;
//! Initialize a multibyte conversion state
virtual void initMBState(CArchMBState) = 0;
//! Test a multibyte conversion state
virtual bool isInitMBState(CArchMBState) = 0;
//! Convert multibyte to wide character
virtual int convMBToWC(wchar_t*,
const char*, int, CArchMBState) = 0;
//! Convert wide character to multibyte
virtual int convWCToMB(char*, wchar_t, CArchMBState) = 0;
//! Convert wide character string to multibyte string
virtual int convStringWCToMB(char*,
const wchar_t*, UInt32 n, bool* errors) = 0;
//! Return the architecture's native wide character encoding
virtual EWideCharEncoding

View File

@ -86,8 +86,6 @@ WIN32_SOURCE_FILES = \
EXTRA_DIST = \
arch.dsp \
CMultibyte.cpp \
CMultibyteEmu.cpp \
CMultibyteOS.cpp \
vsnprintf.cpp \
$(UNIX_SOURCE_FILES) \
$(WIN32_SOURCE_FILES) \

View File

@ -140,16 +140,6 @@ SOURCE=.\CMultibyte.cpp
# End Source File
# Begin Source File
SOURCE=.\CMultibyteEmu.cpp
# PROP Exclude_From_Build 1
# End Source File
# Begin Source File
SOURCE=.\CMultibyteOS.cpp
# PROP Exclude_From_Build 1
# End Source File
# Begin Source File
SOURCE=.\vsnprintf.cpp
# PROP Exclude_From_Build 1
# End Source File

View File

@ -220,57 +220,15 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
UInt32 size;
wchar_t* tmp = UTF8ToWideChar(src, size, errors);
// get length of multibyte string
int mblen;
CArchMBState state = ARCH->newMBState();
size_t len = 0;
UInt32 n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
mblen = ARCH->convWCToMB(NULL, *scan, state);
if (mblen == -1) {
// unconvertable character
setError(errors);
len += 1;
}
else {
len += mblen;
}
}
// handle nul terminator
mblen = ARCH->convWCToMB(NULL, L'\0', state);
if (mblen != -1) {
len += mblen;
}
assert(ARCH->isInitMBState(state) != 0);
// allocate multibyte string
char* mbs = new char[len];
// convert to multibyte
char* dst = mbs;
n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
mblen = ARCH->convWCToMB(dst, *scan, state);
if (mblen == -1) {
// unconvertable character
*dst++ = '?';
}
else {
dst += mblen;
}
}
mblen = ARCH->convWCToMB(dst, L'\0', state);
if (mblen != -1) {
// don't include nul terminator
dst += mblen - 1;
}
CString text(mbs, dst - mbs);
// convert string to multibyte
int len = ARCH->convStringWCToMB(NULL, tmp, size, errors);
char* mbs = new char[len + 1];
ARCH->convStringWCToMB(mbs, tmp, size, errors);
CString text(mbs, len);
// clean up
delete[] mbs;
delete[] tmp;
ARCH->closeMBState(state);
return text;
}
@ -325,88 +283,17 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
// default to success
resetError(errors);
// get length of multibyte string
UInt32 n = src.size();
size_t len = 0;
CArchMBState state = ARCH->newMBState();
for (const char* scan = src.c_str(); n > 0; ) {
int mblen = ARCH->convMBToWC(NULL, scan, n, state);
switch (mblen) {
case -2:
// incomplete last character. convert to unknown character.
setError(errors);
len += 1;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
setError(errors);
len += 1;
scan += 1;
n -= 1;
break;
case 0:
len += 1;
scan += 1;
n -= 1;
break;
default:
// normal character
len += 1;
scan += mblen;
n -= mblen;
break;
}
}
ARCH->initMBState(state);
// allocate wide character string
wchar_t* wcs = new wchar_t[len];
// convert multibyte to wide char
n = src.size();
wchar_t* dst = wcs;
for (const char* scan = src.c_str(); n > 0; ++dst) {
int mblen = ARCH->convMBToWC(dst, scan, n, state);
switch (mblen) {
case -2:
// incomplete character. convert to unknown character.
*dst = (wchar_t)0xfffd;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
*dst = (wchar_t)0xfffd;
scan += 1;
n -= 1;
break;
case 0:
*dst = (wchar_t)0x0000;
scan += 1;
n -= 1;
break;
default:
// normal character
scan += mblen;
n -= mblen;
break;
}
}
// convert string to wide characters
UInt32 n = src.size();
int len = ARCH->convStringMBToWC(NULL, src.c_str(), n, errors);
wchar_t* wcs = new wchar_t[len + 1];
ARCH->convStringMBToWC(wcs, src.c_str(), n, errors);
// convert to UTF8
CString utf8 = wideCharToUTF8(wcs, len, errors);
// clean up
delete[] wcs;
ARCH->closeMBState(state);
return utf8;
}