Matthias Brantner has proposed merging lp:~zorba-coders/zorba/feature-ft_module 
into lp:zorba.

Requested reviews:
  Matthias Brantner (matthias-brantner)
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #944795 in Zorba: "XQDoc doesn't handle & in URLs"
  https://bugs.launchpad.net/zorba/+bug/944795

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104841

optimized ft:tokenize (no validation of tokens + factorized creation of qnames)
-- 
https://code.launchpad.net/~zorba-coders/zorba/feature-ft_module/+merge/104841
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/ft_module_impl.cpp'
--- src/runtime/full_text/ft_module_impl.cpp	2012-04-28 00:48:03 +0000
+++ src/runtime/full_text/ft_module_impl.cpp	2012-05-05 16:30:27 +0000
@@ -453,9 +453,48 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+TokenizeIterator::TokenizeIterator(
+  static_context* sctx,
+  const QueryLoc& loc,
+  std::vector<PlanIter_t>& children)
+  : NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children)
+{
+  initMembers();
+}
+
+void TokenizeIterator::serialize( ::zorba::serialization::Archiver& ar)
+{
+  serialize_baseclass(ar,
+     (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this);
+  if (!ar.is_serializing_out())
+  {
+    initMembers();
+  }
+}
+
+void TokenizeIterator::initMembers() {
+  GENV_ITEMFACTORY->createQName(
+    token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "", "token");
+
+  GENV_ITEMFACTORY->createQName(
+    lang_qname_, "", "", "lang");
+
+  GENV_ITEMFACTORY->createQName(
+    para_qname_, "", "", "paragraph");
+
+  GENV_ITEMFACTORY->createQName(
+    sent_qname_, "", "", "sentence");
+
+  GENV_ITEMFACTORY->createQName(
+    value_qname_, "", "", "value");
+
+  GENV_ITEMFACTORY->createQName(
+    ref_qname_, "", "", "node-ref");
+}
+
 bool TokenizeIterator::nextImpl( store::Item_t &result,
                                  PlanState &plan_state ) const {
-  store::Item_t attr_name, attr_node;
+  store::Item_t node_name, attr_node;
   zstring base_uri;
   store::Item_t item;
   iso639_1::type lang;
@@ -488,69 +527,60 @@
       token = state->doc_tokens_->next();
       ZORBA_ASSERT( token );
 
-      if ( state->token_qname_.isNull() )
-        GENV_ITEMFACTORY->createQName(
-          state->token_qname_, static_context::ZORBA_FULL_TEXT_FN_NS, "",
-          "token"
-        );
-
       base_uri = static_context::ZORBA_FULL_TEXT_FN_NS;
       type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+      node_name = token_qname_;
       GENV_ITEMFACTORY->createElementNode(
-        result, nullptr, state->token_qname_, type_name, false, false,
+        result, nullptr, node_name, type_name, false, false,
         ns_bindings, base_uri
       );
 
       if ( token->lang() ) {
         value_string = iso639_1::string_of[ token->lang() ];
-        GENV_ITEMFACTORY->createQName( attr_name, "", "", "lang" );
         GENV_ITEMFACTORY->createString( item, value_string );
         type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+        node_name = lang_qname_;
         GENV_ITEMFACTORY->createAttributeNode(
-          attr_node, result, attr_name, type_name, item
+          attr_node, result, node_name, type_name, item
         );
       }
 
       ztd::to_string( token->para(), &value_string );
-      GENV_ITEMFACTORY->createQName( attr_name, "", "", "paragraph" );
       GENV_ITEMFACTORY->createString( item, value_string );
       type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+      node_name = para_qname_;
       GENV_ITEMFACTORY->createAttributeNode(
-        attr_node, result, attr_name, type_name, item
+        attr_node, result, node_name, type_name, item
       );
 
       ztd::to_string( token->sent(), &value_string );
-      GENV_ITEMFACTORY->createQName( attr_name, "", "", "sentence" );
       GENV_ITEMFACTORY->createString( item, value_string );
       type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+      node_name = sent_qname_;
       GENV_ITEMFACTORY->createAttributeNode(
-        attr_node, result, attr_name, type_name, item
+        attr_node, result, node_name, type_name, item
       );
 
       value_string = token->value();
-      GENV_ITEMFACTORY->createQName( attr_name, "", "", "value" );
       GENV_ITEMFACTORY->createString( item, value_string );
       type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+      node_name = value_qname_;
       GENV_ITEMFACTORY->createAttributeNode(
-        attr_node, result, attr_name, type_name, item
+        attr_node, result, node_name, type_name, item
       );
 
       if ( store::Item const *const token_item = token->item() ) {
         if ( GENV_STORE.getNodeReference( item, token_item ) ) {
           item->getStringValue2( value_string );
-          GENV_ITEMFACTORY->createQName( attr_name, "", "", "node-ref" );
           GENV_ITEMFACTORY->createString( item, value_string );
           type_name = GENV_TYPESYSTEM.XS_UNTYPED_QNAME;
+          node_name = ref_qname_;
           GENV_ITEMFACTORY->createAttributeNode(
-            attr_node, result, attr_name, type_name, item
+            attr_node, result, node_name, type_name, item
           );
         }
       }
 
-#ifndef ZORBA_NO_XMLSCHEMA
-      sctx->validate( result, result, StaticContextConsts::strict_validation );
-#endif /* ZORBA_NO_XMLSCHEMA */
-
       STACK_PUSH( true, state );
     } // while
   }

=== modified file 'src/runtime/full_text/pregenerated/ft_module.h'
--- src/runtime/full_text/pregenerated/ft_module.h	2012-04-20 20:41:53 +0000
+++ src/runtime/full_text/pregenerated/ft_module.h	2012-05-05 16:30:27 +0000
@@ -424,7 +424,6 @@
 public:
   store::Item_t doc_item_; //
   FTTokenIterator_t doc_tokens_; //
-  store::Item_t token_qname_; //
 
   TokenizeIteratorState();
 
@@ -435,28 +434,31 @@
 
 class TokenizeIterator : public NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>
 { 
+protected:
+  store::Item_t token_qname_; //
+  store::Item_t lang_qname_; //
+  store::Item_t para_qname_; //
+  store::Item_t sent_qname_; //
+  store::Item_t value_qname_; //
+  store::Item_t ref_qname_; //
 public:
   SERIALIZABLE_CLASS(TokenizeIterator);
 
   SERIALIZABLE_CLASS_CONSTRUCTOR2T(TokenizeIterator,
     NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>);
 
-  void serialize( ::zorba::serialization::Archiver& ar)
-  {
-    serialize_baseclass(ar,
-    (NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>*)this);
-  }
+  void serialize( ::zorba::serialization::Archiver& ar);
 
   TokenizeIterator(
     static_context* sctx,
     const QueryLoc& loc,
     std::vector<PlanIter_t>& children)
-    : 
-    NaryBaseIterator<TokenizeIterator, TokenizeIteratorState>(sctx, loc, children)
-  {}
+    ;
 
   virtual ~TokenizeIterator();
 
+public:
+  void initMembers();
   void accept(PlanIterVisitor& v) const;
 
   bool nextImpl(store::Item_t& result, PlanState& aPlanState) const;

=== modified file 'src/runtime/spec/full_text/ft_module.xml'
--- src/runtime/spec/full_text/ft_module.xml	2012-04-26 13:25:13 +0000
+++ src/runtime/spec/full_text/ft_module.xml	2012-05-05 16:30:27 +0000
@@ -165,14 +165,24 @@
 
 <zorba:iterator name="TokenizeIterator"
                 generateResetImpl="true"
+                generateSerialize="false"
+                generateConstructor="false"
                 preprocessorGuard="#ifndef ZORBA_NO_FULL_TEXT">
 
   <zorba:state generateInit="use-default">
     <zorba:member type="store::Item_t" name="doc_item_"/>
     <zorba:member type="FTTokenIterator_t" name="doc_tokens_"/>
-    <zorba:member type="store::Item_t" name="token_qname_"/>
   </zorba:state>
 
+  <zorba:member type="store::Item_t" name="token_qname_"/>
+  <zorba:member type="store::Item_t" name="lang_qname_"/>
+  <zorba:member type="store::Item_t" name="para_qname_"/>
+  <zorba:member type="store::Item_t" name="sent_qname_"/>
+  <zorba:member type="store::Item_t" name="value_qname_"/>
+  <zorba:member type="store::Item_t" name="ref_qname_"/>
+
+  <zorba:method name="initMembers" return="void"/>
+
 </zorba:iterator>
 
 <zorba:iterator name="TokenizerPropertiesIterator"

=== modified file 'src/runtime/spec/iterator_h.xq'
--- src/runtime/spec/iterator_h.xq	2012-05-03 12:31:51 +0000
+++ src/runtime/spec/iterator_h.xq	2012-05-05 16:30:27 +0000
@@ -153,16 +153,21 @@
     local:children-decl($iter),
     local:add-constructor-param($iter),
     ')',
-    $gen:newline, gen:indent(2), ': ',
-    $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter),
-    if ($iter/@base)
-    then concat(', ',
-            string-join(
-              for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"]
-              return $base-param/@name, ', '))
-    else "",
-    local:add-constructor-param-2($iter),
-    $gen:newline, gen:indent(1), '{}',
+    $gen:newline, gen:indent(2),
+    if (not(exists($iter/@generateConstructor)) or $iter/@generateConstructor = "true")
+    then concat(
+      ': ',
+      $gen:newline, gen:indent(2), $base, '(sctx, loc', local:children-args($iter),
+      if ($iter/@base)
+      then concat(', ',
+              string-join(
+                for $base-param in $iter/zorba:constructor/zorba:parameter[@base = "true"]
+                return $base-param/@name, ', '))
+      else "",
+      local:add-constructor-param-2($iter),
+      $gen:newline, gen:indent(1), '{}')
+    else
+      ';',
     $gen:newline, $gen:newline
   )
 };

=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq	2012-03-08 18:46:22 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-module-tokenize-2.xq	2012-05-05 16:30:27 +0000
@@ -1,9 +1,11 @@
 import module namespace ft = "http://www.zorba-xquery.com/modules/full-text";;
 
+import schema namespace fts = "http://www.zorba-xquery.com/modules/full-text";;
+
 let $doc := <msg xml:lang="es">hola, mundo</msg>
 let $tokens := ft:tokenize( $doc )
-let $t1 := $tokens[1]
-let $t2 := $tokens[2]
+let $t1 := validate { $tokens[1] }
+let $t2 := validate { $tokens[2] }
 
 return  $t1/@value = "hola"
     and $t1/@lang = "es"

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to