Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1073175 into 
lp:zorba with lp:~paul-lucas/zorba/feature-utf8_streambuf as a prerequisite.

Requested reviews:
  Sorin Marian Nasoi (sorin.marian.nasoi)
  Chris Hillery (ceejatec)
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #1073175 in Zorba: "FOTS: fn:unparsed-text-lines test hangs"
  https://bugs.launchpad.net/zorba/+bug/1073175

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762

Fix for bug 1073175. 
Changes to fn:unparsed-text, fn:unparsed-text-available and 
fn:unparsed-text-lines. 
They now throw the correct error when the input contains unsupported utf-8 
characters and no Encoding was stablished. fn:unparsed-text-available now 
returns the correct value when this case is present.

-- 
https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/pregenerated/sequences.h'
--- src/runtime/sequences/pregenerated/sequences.h	2012-10-08 12:09:36 +0000
+++ src/runtime/sequences/pregenerated/sequences.h	2013-01-24 17:53:25 +0000
@@ -1429,6 +1429,7 @@
 public:
   std::unique_ptr<std::istream, StreamReleaser>* theStream; //the current stream
    internal::StreamResource* theStreamResource; //the current iterator
+  bool isEncoded; //Input is Encoded
 
   FnUnparsedTextLinesIteratorState();
 

=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp	2013-01-11 10:18:14 +0000
+++ src/runtime/sequences/sequences_impl.cpp	2013-01-24 17:53:25 +0000
@@ -31,6 +31,8 @@
 #include <zorba/util/time.h>
 
 #include <zorba/transcode_stream.h>
+#include <util/utf8_util_base.h>
+#include <util/utf8_streambuf.h>
 
 #include <util/fs_util.h>
 #include <util/uri_util.h>
@@ -1904,6 +1906,7 @@
   QueryLoc const& loc,
   store::Item_t& oResult)
 {
+  zstring lValidateString;
   //Normalize input to handle filesystem paths, etc.
   zstring lNormUri;
   normalizeInputUri(aUri, aSctx, loc, &lNormUri);
@@ -1942,6 +1945,34 @@
     }
     transcode::attach(*lStream.get(), aEncoding.c_str());
   }
+  else
+  { 
+    //Verify valid utf-8 on the stream
+    std::ios::iostate const orig_exceptions = lStream.get()->exceptions();
+    utf8::streambuf utf_buf(lStream.get()->rdbuf());
+    try
+    {
+      lStream.get()->ios::rdbuf(&utf_buf);
+      lStream.get()->ios::exceptions(orig_exceptions | ios::badbit);
+      
+      //Read streambuf to verify utf-8      
+      char buf[1024];
+      while (lStream.get()->good())
+      {
+        lStream.get()->read(buf, sizeof(buf));
+      }      
+
+      lStream.get()->ios::rdbuf(utf_buf.original());
+      lStream.get()->ios::exceptions(orig_exceptions);
+    }
+    catch (...)
+    {
+      lStream.get()->ios::rdbuf(utf_buf.original());
+      lStream.get()->ios::exceptions(orig_exceptions);
+      throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+    }
+  }
+
   //creates stream item
   GENV_ITEMFACTORY->createStreamableString(
     oResult,
@@ -2046,6 +2077,7 @@
   zstring encodingString("UTF-8");
   zstring lNormUri;
   zstring lErrorMessage;
+  const char* isInvalid;
   std::auto_ptr<internal::Resource> lResource;
   StreamReleaser lStreamReleaser;
   std::auto_ptr<zorba::URI> lUri;
@@ -2091,8 +2123,10 @@
   state->theStreamResource->setStreamReleaser(nullptr);
 
   //check if encoding is needed
+ state->isEncoded = false;
   if (transcode::is_necessary(encodingString.c_str()))
   {
+    state->isEncoded = true;
     if (!transcode::is_supported(encodingString.c_str()))
     {
       throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
@@ -2103,8 +2137,16 @@
   while (state->theStream->get()->good())
   {
     getline(*state->theStream->get(), streamLine);
+    if(!state->isEncoded)
+    {
+      isInvalid = utf8::validate(streamLine.c_str()); 
+      if (isInvalid)
+      {
+        throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc));
+      }
+    }
     STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state);
-  }
+  }  
 
   STACK_END(state);
 }

=== modified file 'src/runtime/spec/sequences/sequences.xml'
--- src/runtime/spec/sequences/sequences.xml	2012-10-15 13:35:59 +0000
+++ src/runtime/spec/sequences/sequences.xml	2013-01-24 17:53:25 +0000
@@ -1306,6 +1306,7 @@
                     brief="the current stream"/>
       <zorba:member type=" internal::StreamResource*" name="theStreamResource"
                   brief="the current iterator"/>
+      <zorba:member type="bool" name="isEncoded" brief="Input is Encoded"/>
     </zorba:state> 
     
     <zorba:methods>

=== modified file 'test/fots_driver/cli.xq'
--- test/fots_driver/cli.xq	2013-01-24 07:06:35 +0000
+++ test/fots_driver/cli.xq	2013-01-24 17:53:25 +0000
@@ -190,6 +190,7 @@
     ), "&#xA;")
 };
 
+<<<<<<< TREE
 
 (:~
   Tokenize a string that contains a comma-separated list of tokens.
@@ -210,6 +211,17 @@
 
 variable $testCasePrefixesMsg := "'testCasePrefixes' was set to: ";
 
+=======
+(:~ The test cases in this list have bugs assigned and should not be run :)
+variable $exceptedTestCases := (
+"cbcl-subsequence-011", "cbcl-subsequence-012", "cbcl-subsequence-013",
+"cbcl-subsequence-014"                                  (:see bug lp:1069794 :)
+, "re00975", "re00976", "re00976a"                      (:see bug lp:1070533 :)
+);
+
+(:~  The test in this list have bugs assigned already and should not be run :)
+variable $exceptedTestSets := ();
+>>>>>>> MERGE-SOURCE
 
 switch ($mode)
 

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to