Matthias Brantner has proposed merging lp:~zorba-coders/zorba/feature-ft_module into lp:zorba.
Requested reviews: Paul J. Lucas (paul-lucas) Related bugs: Bug #944795 in Zorba: "XQDoc doesn't handle & in URLs" https://bugs.launchpad.net/zorba/+bug/944795 For more details, see: https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104830 optimized ft:tokenize (no validation of tokens + factorized creation of qnames) -- https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104830 Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/ft_module_impl.cpp' --- src/runtime/full_text/ft_module_impl.cpp 2012-04-28 00:48:03 +0000 +++ src/runtime/full_text/ft_module_impl.cpp 2012-05-05 11:42:19 +0000 @@ -453,9 +453,48 @@ /////////////////////////////////////////////////////////////////////////////// +TokenizeIterator::TokenizeIterator( + static_context* sctx, + const QueryLoc& loc, + std::vector<PlanIter_t>& children) + : NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children) +{ + initMembers(); +} + +void TokenizeIterator::serialize( ::zorba::serialization::Archiver& ar) +{ + serialize_baseclass(ar, + (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this); + if (!ar.is_serializing_out()) + { + initMembers(); + } +} + +void TokenizeIterator::initMembers() { + GENV_ITEMFACTORY->createQName( + token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", "token"); + + GENV_ITEMFACTORY->createQName( + lang_qname_, "", "", "lang"); + + GENV_ITEMFACTORY->createQName( + para_qname_, "", "", "paragraph"); + + GENV_ITEMFACTORY->createQName( + sent_qname_, "", "", "sentence"); + + GENV_ITEMFACTORY->createQName( + value_qname_, "", "", "value"); + + GENV_ITEMFACTORY->createQName( + ref_qname_, "", "", "node-ref"); +} + bool TokenizeIterator::nextImpl( store::Item_t &result, PlanState &plan_state ) const { - store::Item_t attr_name, attr_node; + store::Item_t node_name, attr_node; zstring base_uri; store::Item_t item; iso639_1::type lang; @@ -488,69 +527,60 @@ token = state->doc_tokens_->next(); ZORBA_ASSERT( token ); - if ( state->token_qname_.isNull() ) - GENV_ITEMFACTORY->createQName( - state->token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", - "token" - ); - base_uri = static_context::ZORBA_FULL_TEXT_FN_NS; type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = token_qname_; GENV_ITEMFACTORY->createElementNode( - result, nullptr, state->token_qname_, type_name, false, false, + result, nullptr, node_name, type_name, false, false, ns_bindings, base_uri ); if ( token->lang() ) { value_string = iso639_1::string_of[ token->lang() ]; - GENV_ITEMFACTORY->createQName( attr_name, "", "", "lang" ); GENV_ITEMFACTORY->createString( item, value_string ); type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = lang_qname_; GENV_ITEMFACTORY->createAttributeNode( - attr_node, result, attr_name, type_name, item + attr_node, result, node_name, type_name, item ); } ztd::to_string( token->para(), &value_string ); - GENV_ITEMFACTORY->createQName( attr_name, "", "", "paragraph" ); GENV_ITEMFACTORY->createString( item, value_string ); type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = para_qname_; GENV_ITEMFACTORY->createAttributeNode( - attr_node, result, attr_name, type_name, item + attr_node, result, node_name, type_name, item ); ztd::to_string( token->sent(), &value_string ); - GENV_ITEMFACTORY->createQName( attr_name, "", "", "sentence" ); GENV_ITEMFACTORY->createString( item, value_string ); type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = sent_qname_; GENV_ITEMFACTORY->createAttributeNode( - attr_node, result, attr_name, type_name, item + attr_node, result, node_name, type_name, item ); value_string = token->value(); - GENV_ITEMFACTORY->createQName( attr_name, "", "", "value" ); GENV_ITEMFACTORY->createString( item, value_string ); type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = value_qname_; GENV_ITEMFACTORY->createAttributeNode( - attr_node, result, attr_name, type_name, item + attr_node, result, node_name, type_name, item ); if ( store::Item const *const token_item = token->item() ) { if ( GENV_STORE.getNodeReference( item, token_item ) ) { item->getStringValue2( value_string ); - GENV_ITEMFACTORY->createQName( attr_name, "", "", "node-ref" ); GENV_ITEMFACTORY->createString( item, value_string ); type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME; + node_name = ref_qname_; GENV_ITEMFACTORY->createAttributeNode( - attr_node, result, attr_name, type_name, item + attr_node, result, node_name, type_name, item ); } } -#ifndef ZORBA_NO_XMLSCHEMA - sctx->validate( result, result, StaticContextConsts::strict_validation ); -#endif /* ZORBA_NO_XMLSCHEMA */ - STACK_PUSH( true, state ); } // while } === modified file 'src/runtime/full_text/pregenerated/ft_module.h' --- src/runtime/full_text/pregenerated/ft_module.h 2012-04-20 20:41:53 +0000 +++ src/runtime/full_text/pregenerated/ft_module.h 2012-05-05 11:42:19 +0000 @@ -424,7 +424,6 @@ public: store::Item_t doc_item_; // FTTokenIterator_t doc_tokens_; // - store::Item_t token_qname_; // TokenizeIteratorState(); @@ -435,28 +434,31 @@ class TokenizeIterator : public NaryBaseIterator<TokenizeIterator, TokenizeIteratorState> { +protected: + store::Item_t token_qname_; // + store::Item_t lang_qname_; // + store::Item_t para_qname_; // + store::Item_t sent_qname_; // + store::Item_t value_qname_; // + store::Item_t ref_qname_; // public: SERIALIZABLE_CLASS(TokenizeIterator); SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeIterator, NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>); - void serialize( ::zorba::serialization::Archiver& ar) - { - serialize_baseclass(ar, - (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this); - } + void serialize( ::zorba::serialization::Archiver& ar); TokenizeIterator( static_context* sctx, const QueryLoc& loc, std::vector<PlanIter_t>& children) - : - NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children) - {} + ; virtual ~TokenizeIterator(); +public: + void initMembers(); void accept(PlanIterVisitor& v) const; bool nextImpl(store::Item_t& result, PlanState& aPlanState) const; === modified file 'src/runtime/spec/full_text/ft_module.xml' --- src/runtime/spec/full_text/ft_module.xml 2012-04-26 13:25:13 +0000 +++ src/runtime/spec/full_text/ft_module.xml 2012-05-05 11:42:19 +0000 @@ -165,14 +165,24 @@ <zorba:iterator name="TokenizeIterator" generateResetImpl="true" + generateSerialize="false" + generateConstructor="false" preprocessorGuard="#ifndef ZORBA_NO_FULL_TEXT"> <zorba:state generateInit="use-default"> <zorba:member type="store::Item_t" name="doc_item_"/> <zorba:member type="FTTokenIterator_t" name="doc_tokens_"/> - <zorba:member type="store::Item_t" name="token_qname_"/> </zorba:state> + <zorba:member type="store::Item_t" name="token_qname_"/> + <zorba:member type="store::Item_t" name="lang_qname_"/> + <zorba:member type="store::Item_t" name="para_qname_"/> + <zorba:member type="store::Item_t" name="sent_qname_"/> + <zorba:member type="store::Item_t" name="value_qname_"/> + <zorba:member type="store::Item_t" name="ref_qname_"/> + + <zorba:method name="initMembers" return="void"/> + </zorba:iterator> <zorba:iterator name="TokenizerPropertiesIterator" === modified file 'src/runtime/spec/iterator_h.xq' --- src/runtime/spec/iterator_h.xq 2012-05-03 12:31:51 +0000 +++ src/runtime/spec/iterator_h.xq 2012-05-05 11:42:19 +0000 @@ -153,16 +153,21 @@ local:children-decl($iter), local:add-constructor-param($iter), ')', - $gen:newline, gen:indent(2), ': ', - $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter), - if ($iter/@base) - then concat(', ', - string-join( - for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"] - return $base-param/@name, ', ')) - else "", - local:add-constructor-param-2($iter), - $gen:newline, gen:indent(1), '{}', + $gen:newline, gen:indent(2), + if (not(exists($iter/@generateConstructor)) or $iter/@generateConstructor = "true") + then concat( + ': ', + $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter), + if ($iter/@base) + then concat(', ', + string-join( + for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"] + return $base-param/@name, ', ')) + else "", + local:add-constructor-param-2($iter), + $gen:newline, gen:indent(1), '{}') + else + ';', $gen:newline, $gen:newline ) }; === modified file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq' --- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq 2012-03-08 18:46:22 +0000 +++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq 2012-05-05 11:42:19 +0000 @@ -1,9 +1,11 @@ import module namespace ft = "http://www.zorba-xquery.com/modules/full-text"; +import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text"; + let $doc := <msg xml:lang="es">hola, mundo</msg> let $tokens := ft:tokenize( $doc ) -let $t1 := $tokens[1] -let $t2 := $tokens[2] +let $t1 := validate { $tokens[1] } +let $t2 := validate { $tokens[2] } return $t1/@value = "hola" and $t1/@lang = "es"
-- Mailing list: https://launchpad.net/~zorba-coders Post to : zorba-coders@lists.launchpad.net Unsubscribe : https://launchpad.net/~zorba-coders More help : https://help.launchpad.net/ListHelp