Hi all,
I've re-implemented the iconv transcode methods that return a newly
allocated string (the fixed-buffer variants are untouched) and attached
a patch to this e-mail.
The new implementation works as follows:
- Start with a string of (scalable) fixed initial size.
- Do the transcode via wctomb and mbtowc, that means the transcoding is
done per-symbol.
- Each time the allocated buffer appears to be too small, it is
re-allocated with double size.
I expect the following pros and cons:
Pros:
- Double speed (if not even better;) with small strings
- Complexity O(n) with huge strings. The overall speed performance with
large strings should always be better than before.
- String handling is done explicitly and locally, showing the maximum
possible performance, compared to some general string handling.
Cons:
There is a significant memory penalty, depending on the initial string
size:
- With very small strings (e.g. len=1), the memory eaten is several times
larger than the required minimum.
- With large strings, the statistically expected memory usage is 1.5
times larger than the required minimum. It is guaranteed, not to be
worse than a factor of 2.0.
- String handling is done explicitly and locally, and should be
generalized, once the advantages have been proven.
The disadvantages of over-consuming memory could be eliminated by a
final, additional string allocation (with the exact required size) and a
copy operation. However, I did not implement it yet, for speed
performance reasons. Finally, I'd like to offer some options to the user
(e.g. through configure), so she can select between the tradeoffs of
memory consumption vs. speed.
The iconv transcoder could be some kind of prototype for re-organizing
the other transcoders, too. I can't do much benchmarking here, so I hope
that some people will compare my new implementation to the old one, and
report their results.
If it is of interest here, I could post a recently made complexity
analysis of the above algorithm.
Cheers,
Axel
--
Humboldt-Universit�t zu Berlin
Institut f�r Informatik
Signalverarbeitung und Mustererkennung
Dipl.-Inf. Axel Wei�
Rudower Chaussee 25
12489 Berlin-Adlershof
+49-30-2093-3050
** www.freesp.de **
--- 3.0-unstable-orig/src/xercesc/util/Transcoders/Iconv/IconvTransService.cpp 2005-05-28 19:57:51.000000000 +0200
+++ 3.0-unstable/src/xercesc/util/Transcoders/Iconv/IconvTransService.cpp 2005-05-29 09:41:40.000000000 +0200
@@ -65,7 +65,7 @@
{
chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull
};
-
+static const size_t gInitialStringSize = 32;
// ---------------------------------------------------------------------------
// Local methods
@@ -279,50 +279,15 @@
char* IconvLCPTranscoder::transcode(const XMLCh* const toTranscode)
{
- if (!toTranscode)
- return 0;
-
- char* retVal = 0;
- if (*toTranscode)
- {
- unsigned int wLent = getWideCharLength(toTranscode);
+ return transcode(toTranscode, XMLPlatformUtils::fgMemoryManager);
+}
- wchar_t tmpWideCharArr[gTempBuffArraySize];
- wchar_t* allocatedArray = 0;
- wchar_t* wideCharBuf = 0;
-
- if (wLent >= gTempBuffArraySize)
- wideCharBuf = allocatedArray = new wchar_t[wLent + 1];
- else
- wideCharBuf = tmpWideCharArr;
-
- for (unsigned int i = 0; i < wLent; i++)
- {
- wideCharBuf[i] = toTranscode[i];
- }
- wideCharBuf[wLent] = 0x00;
-
- // Calc the needed size.
- const size_t neededLen = ::wcstombs(NULL, wideCharBuf, 0);
- if (neededLen == -1)
- {
- delete [] allocatedArray;
- retVal = new char[1];
- retVal[0] = 0;
- return retVal;
- }
-
- retVal = new char[neededLen + 1];
- ::wcstombs(retVal, wideCharBuf, neededLen);
- retVal[neededLen] = 0;
- delete [] allocatedArray;
- }
- else
- {
- retVal = new char[1];
- retVal[0] = 0;
- }
- return retVal;
+static void realloc_string(char *&string, size_t &size, MemoryManager* const manager){
+ char* tmp = (char*)manager->allocate(2 * size * sizeof(char));
+ memcpy(tmp, string, size);
+ manager->deallocate(string);
+ string = tmp;
+ size *= 2;
}
char* IconvLCPTranscoder::transcode(const XMLCh* const toTranscode,
@@ -331,50 +296,28 @@
if (!toTranscode)
return 0;
- char* retVal = 0;
- if (*toTranscode)
- {
- unsigned int wLent = getWideCharLength(toTranscode);
-
- wchar_t tmpWideCharArr[gTempBuffArraySize];
- wchar_t* allocatedArray = 0;
- wchar_t* wideCharBuf = 0;
-
- if (wLent >= gTempBuffArraySize)
- wideCharBuf = allocatedArray = (wchar_t*) manager->allocate
- (
- (wLent + 1) * sizeof(wchar_t)
- );//new wchar_t[wLent + 1];
- else
- wideCharBuf = tmpWideCharArr;
+ const XMLCh *pSrc;
+ size_t retCursor = 0, retSize = gInitialStringSize;
+ char* retString = (char*)manager->allocate(retSize * sizeof(char));
+
+ for (pSrc=toTranscode; *pSrc!=L'\0'; ++pSrc){
+ char tmpBuf[MB_CUR_MAX];
+ int tmpLen = ::wctomb(tmpBuf, *pSrc);
+ if (tmpLen < 0){
+ retCursor = 0;
+ break;
+ }
+ // ensure that the terminating NULL byte
+ // can fit into retString:
+ if (retCursor + tmpLen >= retSize - 1)
+ realloc_string(retString, retSize, manager);
+ int tmpCursor;
+ for (tmpCursor=0; tmpCursor<tmpLen; ++tmpCursor)
+ retString[retCursor++] = tmpBuf[tmpCursor];
+ }
- for (unsigned int i = 0; i < wLent; i++)
- {
- wideCharBuf[i] = toTranscode[i];
- }
- wideCharBuf[wLent] = 0x00;
-
- // Calc the needed size.
- const size_t neededLen = ::wcstombs(NULL, wideCharBuf, 0);
- if (neededLen == -1)
- {
- manager->deallocate(allocatedArray);//delete [] allocatedArray;
- retVal = (char*) manager->allocate(sizeof(char)); //new char[1];
- retVal[0] = 0;
- return retVal;
- }
-
- retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char));//new char[neededLen + 1];
- ::wcstombs(retVal, wideCharBuf, neededLen);
- retVal[neededLen] = 0;
- manager->deallocate(allocatedArray);//delete [] allocatedArray;
- }
- else
- {
- retVal = (char*) manager->allocate(sizeof(char));//new char[1];
- retVal[0] = 0;
- }
- return retVal;
+ retString[retCursor] = '\0';
+ return retString;
}
@@ -439,44 +382,15 @@
XMLCh* IconvLCPTranscoder::transcode(const char* const toTranscode)
{
- if (!toTranscode)
- return 0;
+ return transcode(toTranscode, XMLPlatformUtils::fgMemoryManager);
+}
- XMLCh* retVal = 0;
- if (*toTranscode)
- {
- const unsigned int len = calcRequiredSize(toTranscode);
- if (len == 0)
- {
- retVal = new XMLCh[1];
- retVal[0] = 0;
- return retVal;
- }
-
- wchar_t tmpWideCharArr[gTempBuffArraySize];
- wchar_t* allocatedArray = 0;
- wchar_t* wideCharBuf = 0;
-
- if (len >= gTempBuffArraySize)
- wideCharBuf = allocatedArray = new wchar_t[len + 1];
- else
- wideCharBuf = tmpWideCharArr;
-
- ::mbstowcs(wideCharBuf, toTranscode, len);
- retVal = new XMLCh[len + 1];
- for (unsigned int i = 0; i < len; i++)
- {
- retVal[i] = (XMLCh) wideCharBuf[i];
- }
- retVal[len] = 0x00;
- delete [] allocatedArray;
- }
- else
- {
- retVal = new XMLCh[1];
- retVal[0] = 0;
- }
- return retVal;
+static void realloc_xmlch_string(XMLCh *&string, size_t &size, MemoryManager* const manager){
+ XMLCh* tmp = (XMLCh*)manager->allocate(2 * size * sizeof(XMLCh));
+ memcpy(tmp, string, size * sizeof(XMLCh));
+ manager->deallocate(string);
+ string = tmp;
+ size *= 2;
}
XMLCh* IconvLCPTranscoder::transcode(const char* const toTranscode,
@@ -485,44 +399,27 @@
if (!toTranscode)
return 0;
- XMLCh* retVal = 0;
- if (*toTranscode)
- {
- const unsigned int len = calcRequiredSize(toTranscode, manager);
- if (len == 0)
- {
- retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
- retVal[0] = 0;
- return retVal;
- }
-
- wchar_t tmpWideCharArr[gTempBuffArraySize];
- wchar_t* allocatedArray = 0;
- wchar_t* wideCharBuf = 0;
+ size_t retCursor = 0, retSize = gInitialStringSize, srcLen = strlen(toTranscode), srcCursor = 0;
+ XMLCh* retString = (XMLCh*)manager->allocate(retSize * sizeof(XMLCh));
- if (len >= gTempBuffArraySize)
- wideCharBuf = allocatedArray = (wchar_t*) manager->allocate
- (
- (len + 1) * sizeof(wchar_t)
- );//new wchar_t[len + 1];
- else
- wideCharBuf = tmpWideCharArr;
-
- ::mbstowcs(wideCharBuf, toTranscode, len);
- retVal = (XMLCh*) manager->allocate((len + 1) *sizeof(XMLCh));//new XMLCh[len + 1];
- for (unsigned int i = 0; i < len; i++)
- {
- retVal[i] = (XMLCh) wideCharBuf[i];
- }
- retVal[len] = 0x00;
- manager->deallocate(allocatedArray);//delete [] allocatedArray;
- }
- else
- {
- retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
- retVal[0] = 0;
- }
- return retVal;
+ for ( ;; ){
+ wchar_t tmpBuf[1];
+ int consumed = ::mbtowc(tmpBuf, toTranscode + srcCursor, srcLen - srcCursor);
+ if (consumed <= 0){
+ if (consumed < 0)
+ retCursor = 0;
+ break;
+ }
+ // ensure that the terminating NULL byte
+ // can fit into retString:
+ if (retCursor + 1 >= retSize - 1)
+ realloc_xmlch_string(retString, retSize, manager);
+ retString[retCursor++] = tmpBuf[0];
+ srcCursor += consumed;
+ }
+
+ retString[retCursor] = L'\0';
+ return retString;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]