Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1073175 into lp:zorba with lp:~paul-lucas/zorba/feature-utf8_streambuf as a prerequisite.
Requested reviews: Sorin Marian Nasoi (sorin.marian.nasoi) Chris Hillery (ceejatec) Paul J. Lucas (paul-lucas) Related bugs: Bug #1073175 in Zorba: "FOTS: fn:unparsed-text-lines test hangs" https://bugs.launchpad.net/zorba/+bug/1073175 For more details, see: https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762 Fix for bug 1073175. Changes to fn:unparsed-text, fn:unparsed-text-available and fn:unparsed-text-lines. They now throw the correct error when the input contains unsupported utf-8 characters and no Encoding was stablished. fn:unparsed-text-available now returns the correct value when this case is present. -- https://code.launchpad.net/~zorba-coders/zorba/bug1073175/+merge/144762 Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/pregenerated/sequences.h' --- src/runtime/sequences/pregenerated/sequences.h 2012-10-08 12:09:36 +0000 +++ src/runtime/sequences/pregenerated/sequences.h 2013-01-24 17:53:25 +0000 @@ -1429,6 +1429,7 @@ public: std::unique_ptr<std::istream, StreamReleaser>* theStream; //the current stream internal::StreamResource* theStreamResource; //the current iterator + bool isEncoded; //Input is Encoded FnUnparsedTextLinesIteratorState(); === modified file 'src/runtime/sequences/sequences_impl.cpp' --- src/runtime/sequences/sequences_impl.cpp 2013-01-11 10:18:14 +0000 +++ src/runtime/sequences/sequences_impl.cpp 2013-01-24 17:53:25 +0000 @@ -31,6 +31,8 @@ #include <zorba/util/time.h> #include <zorba/transcode_stream.h> +#include <util/utf8_util_base.h> +#include <util/utf8_streambuf.h> #include <util/fs_util.h> #include <util/uri_util.h> @@ -1904,6 +1906,7 @@ QueryLoc const& loc, store::Item_t& oResult) { + zstring lValidateString; //Normalize input to handle filesystem paths, etc. zstring lNormUri; normalizeInputUri(aUri, aSctx, loc, &lNormUri); @@ -1942,6 +1945,34 @@ } transcode::attach(*lStream.get(), aEncoding.c_str()); } + else + { + //Verify valid utf-8 on the stream + std::ios::iostate const orig_exceptions = lStream.get()->exceptions(); + utf8::streambuf utf_buf(lStream.get()->rdbuf()); + try + { + lStream.get()->ios::rdbuf(&utf_buf); + lStream.get()->ios::exceptions(orig_exceptions | ios::badbit); + + //Read streambuf to verify utf-8 + char buf[1024]; + while (lStream.get()->good()) + { + lStream.get()->read(buf, sizeof(buf)); + } + + lStream.get()->ios::rdbuf(utf_buf.original()); + lStream.get()->ios::exceptions(orig_exceptions); + } + catch (...) + { + lStream.get()->ios::rdbuf(utf_buf.original()); + lStream.get()->ios::exceptions(orig_exceptions); + throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc)); + } + } + //creates stream item GENV_ITEMFACTORY->createStreamableString( oResult, @@ -2046,6 +2077,7 @@ zstring encodingString("UTF-8"); zstring lNormUri; zstring lErrorMessage; + const char* isInvalid; std::auto_ptr<internal::Resource> lResource; StreamReleaser lStreamReleaser; std::auto_ptr<zorba::URI> lUri; @@ -2091,8 +2123,10 @@ state->theStreamResource->setStreamReleaser(nullptr); //check if encoding is needed + state->isEncoded = false; if (transcode::is_necessary(encodingString.c_str())) { + state->isEncoded = true; if (!transcode::is_supported(encodingString.c_str())) { throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc)); @@ -2103,8 +2137,16 @@ while (state->theStream->get()->good()) { getline(*state->theStream->get(), streamLine); + if(!state->isEncoded) + { + isInvalid = utf8::validate(streamLine.c_str()); + if (isInvalid) + { + throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(uriString), ERROR_LOC(loc)); + } + } STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state); - } + } STACK_END(state); } === modified file 'src/runtime/spec/sequences/sequences.xml' --- src/runtime/spec/sequences/sequences.xml 2012-10-15 13:35:59 +0000 +++ src/runtime/spec/sequences/sequences.xml 2013-01-24 17:53:25 +0000 @@ -1306,6 +1306,7 @@ brief="the current stream"/> <zorba:member type=" internal::StreamResource*" name="theStreamResource" brief="the current iterator"/> + <zorba:member type="bool" name="isEncoded" brief="Input is Encoded"/> </zorba:state> <zorba:methods> === modified file 'test/fots_driver/cli.xq' --- test/fots_driver/cli.xq 2013-01-24 07:06:35 +0000 +++ test/fots_driver/cli.xq 2013-01-24 17:53:25 +0000 @@ -190,6 +190,7 @@ ), "
") }; +<<<<<<< TREE (:~ Tokenize a string that contains a comma-separated list of tokens. @@ -210,6 +211,17 @@ variable $testCasePrefixesMsg := "'testCasePrefixes' was set to: "; +======= +(:~ The test cases in this list have bugs assigned and should not be run :) +variable $exceptedTestCases := ( +"cbcl-subsequence-011", "cbcl-subsequence-012", "cbcl-subsequence-013", +"cbcl-subsequence-014" (:see bug lp:1069794 :) +, "re00975", "re00976", "re00976a" (:see bug lp:1070533 :) +); + +(:~ The test in this list have bugs assigned already and should not be run :) +variable $exceptedTestSets := (); +>>>>>>> MERGE-SOURCE switch ($mode)
-- Mailing list: https://launchpad.net/~zorba-coders Post to : zorba-coders@lists.launchpad.net Unsubscribe : https://launchpad.net/~zorba-coders More help : https://help.launchpad.net/ListHelp