Date: Monday, December 19, 2005 @ 16:11:14
  Author: marc
    Path: /cvsroot/carob/carob

Modified: include/JavaSocket.hpp (1.16 -> 1.17) src/DriverSocket.cpp (1.10
          -> 1.11) src/JavaSocket.cpp (1.22 -> 1.23)

Fix CAROB-49: now sending/receiving real UTF-8 to the controller: no more 
dependency on the global
locale, and also supporting nulls inside strings. Controller does not yet 
support nulls inside strings,
see SEQUOIA-133.


------------------------+
 include/JavaSocket.hpp |   12 +++---
 src/DriverSocket.cpp   |    5 ++
 src/JavaSocket.cpp     |   88 +++++++++++++++++------------------------------
 3 files changed, 43 insertions(+), 62 deletions(-)


Index: carob/include/JavaSocket.hpp
diff -u carob/include/JavaSocket.hpp:1.16 carob/include/JavaSocket.hpp:1.17
--- carob/include/JavaSocket.hpp:1.16   Tue Dec 13 16:08:30 2005
+++ carob/include/JavaSocket.hpp        Mon Dec 19 16:11:14 2005
@@ -85,21 +85,23 @@
   virtual bool  closeSocket() throw (SocketIOException, UnexpectedException);
   
   /**
-   * Writes a string to the socket
+   * Writes a UTF-8 encoded wstring to the socket. This is NOT Java's
+   * modified UTF-8, see SEQUOIA-133.
    * @param str string to write
    * @return the number of bytes send
    * @throws SocketIOException
    */
   size_t        writeJavaUTF(const std::wstring& str) const
-                    throw (SocketIOException, UnexpectedException);
+    throw (SocketIOException, CodecException, UnexpectedException);
   /**
-   * Reads java encoded string from socket.
+   * Reads UTF-8 encoded string from socket. This is NOT Java's
+   * modified UTF-8, see SEQUOIA-133.
    * @param str string to read
    * @return the length read in bytes
    * @throws SocketIOException
    */
-  size_t        readJavaUTF(std::wstring& str) const throw (SocketIOException,
-                    UnexpectedException);
+  size_t        readJavaUTF(std::wstring& str) const
+    throw (SocketIOException, CodecException, UnexpectedException);
   /**
    * Writes 32bits-integer to socket
    * @param i integer to send
Index: carob/src/DriverSocket.cpp
diff -u carob/src/DriverSocket.cpp:1.10 carob/src/DriverSocket.cpp:1.11
--- carob/src/DriverSocket.cpp:1.10     Tue Dec 13 16:08:30 2005
+++ carob/src/DriverSocket.cpp  Mon Dec 19 16:11:14 2005
@@ -50,6 +50,7 @@
   size_t strLen = s.size();
   *this<<(int32_t)strLen;
   size_t totalWritten = 0;
+  // FIXME: catch CodecException here
   while (totalWritten < strLen)
   {
     totalWritten += writeJavaUTF(s);
@@ -74,7 +75,7 @@
   *this>>ack;
   if (ack)
   {
-    //First reads the size of the upcomming string, then the string itself.
+    //First reads the size of the upcoming string, then the string itself.
     int strSize;
     *this>>strSize;
     if (strSize > 0)
@@ -82,8 +83,10 @@
       s.reserve(strSize+1);
       size_t sizeRead = 0;
       //We must read strSize character, which can be done in several passes
+      // Actually not! - MH
       while ((int)sizeRead<strSize)
       {
+       // FIXME: catch CodecException here
         sizeRead += readJavaUTF(s);
       }
     }
Index: carob/src/JavaSocket.cpp
diff -u carob/src/JavaSocket.cpp:1.22 carob/src/JavaSocket.cpp:1.23
--- carob/src/JavaSocket.cpp:1.22       Fri Dec 16 21:27:26 2005
+++ carob/src/JavaSocket.cpp    Mon Dec 19 16:11:14 2005
@@ -185,54 +185,34 @@
 // /usr/share/doc/gcc-4.0-base/libstdc++/html/documentation.html
 
 size_t JavaSocket::writeJavaUTF(const wstring& str) const
-    throw (SocketIOException, UnexpectedException)
+  throw (SocketIOException, CodecException, UnexpectedException)
 {
   wstring fctName(L"JavaSocket::writeJavaUTF");
-  // Size of the wide string in bytes, including the wide terminating null
-  uint16_t strlenPlusOne = (str.length()+1)*sizeof(wchar_t);
-  
-  // FIXME: we don't support inline zeros. We should get rid of c_str
-  const wchar_t* oriStr = str.c_str();
- // guessing the max converted size like this is hopelessly wrong. By chance 
it's usually enough.
-  uint8_t* utfStr = new uint8_t[strlenPlusOne];
-  size_t countConverted;
-  mbstate_t mbstate = {0};
-
   if (isVerboseEnabled())
     logVerbose(fctName, L"Converting string...");
-  // FIXME: don't use this locale converter, we need to send UTF-8!
-  // we should use: toUTF8() instead
-  countConverted = wcsrtombs((char*)utfStr, &oriStr,
-                             strlenPlusOne, &mbstate);
-  if (countConverted == (size_t)-1)
-  {
-    throw SocketIOException(fctName + L"An encoding error occured while 
converting the string.");
-  }
-  else if (countConverted > 0) //don't send anything if empty string
-  {
-    if (isVerboseEnabled())
-      logVerbose(fctName, L"String was successfuly converted, sending string 
length");
 
-    uint16_t netlen = htons(countConverted);
+  std::string utf8str(toUTF8(str));
 
-    //First write number of bytes to follow as 
-    if (sendToSocket(fctName, L"UTF string", &netlen, sizeof(netlen), 
MSG_NOSIGNAL))
-    {
-      if (isVerboseEnabled())
-        logVerbose(fctName, L"Ok. Sending string");
-      // Send the encoded string (wcsrtombs() does not count the terminating 
zero)
-      sendToSocket(fctName, L"UTF string", utfStr, countConverted, 
MSG_NOSIGNAL);
-    }
+  // FIXME: move netlen to uint32_t once SEQUOIA-133 is complete
+  uint16_t netlen = htons(utf8str.length());
+
+  //First write number of bytes to follow as 
+  if (sendToSocket(fctName, L"UTF string", &netlen, sizeof(netlen), 
MSG_NOSIGNAL))
+  {
+    if (isVerboseEnabled())
+      logVerbose(fctName, L"Ok. Sending string");
+    sendToSocket(fctName, L"UTF string", utf8str.data(), utf8str.length(), 
MSG_NOSIGNAL);
   }
-  delete[] utfStr;
-  return countConverted;
+
+  return utf8str.length();
 }
 
-size_t JavaSocket::readJavaUTF(wstring& s) const throw (SocketIOException,
-    UnexpectedException)
+size_t JavaSocket::readJavaUTF(wstring& s) const
+  throw (SocketIOException, CodecException, UnexpectedException)
 {
   wstring fctName(L"JavaSocket::readJavaUTF");
   
+  // FIXME: move lenRecNet to uint32_t once SEQUOIA-133 is complete
   //the size read on the network
   uint16_t lenRecNet;
   //the converted size
@@ -242,30 +222,18 @@
   if (receiveFromSocket(fctName, L"UTF string size", &lenRecNet, 
sizeof(lenRecNet), 0))
   {
     lenRec = ntohs(lenRecNet); // number of bytes to come
-  
-    uint8_t* utfStr = new uint8_t[lenRec+1]; // add a zero to C-terminate it
-    utfStr[lenRec] = 0;
+
+    uint8_t* utfStr = new uint8_t[lenRec];
     if (receiveFromSocket(fctName, L"UTF string", utfStr, lenRec, 0))
     {
-      // FIXME: we don't support inline zeros. We should get rid of c_str
-      std::string received((const char*)utfStr);
+      const std::string received((const char*)utfStr, lenRec);
 
-      // FIXME: don't use this locale converter cause received is ALWAYS UTF-8!
-      // this is correct:
-      // s = fromUTF8(received);
-      s = fromString(received);
-
-#if 0 // FIXME catch conversion exception
-      {
-        delete[] utfStr;
-        throw SocketIOException(fctName
-            + L"An error occured while converting the string.");
+      try {
+        s = fromUTF8(received);
+      } catch (CodecException) {
+        delete[] utfStr; throw;
       }
-#endif
-
-        sizeRead = (size_t)lenRec;
-
-
+      sizeRead = (size_t)lenRec;
     }
     delete[] utfStr;
   }
@@ -394,3 +362,11 @@
     return true;
   }
 }
+
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 2
+ * indent-tabs-mode: nil
+ * End:
+ */

_______________________________________________
Carob-commits mailing list
[email protected]
https://forge.continuent.org/mailman/listinfo/carob-commits

Reply via email to