[MarkLogic Dev General] Target ! cts:atribute-word-query !! Was RE: xml:lang - refinement step 1 - RE: Indexing strategy for attributes when using xdmp:xlst-invoke

David Lee Thu, 25 Jun 2015 09:23:21 -0700

A bit more narrowing - 
This shows the issue is in cts:element-attribute-word-query only , and 
unrelated to xslt or search:search, or element-word-query.






--------------------------------------------------

First change the source to include child text to test:

----------
xquery version "1.0-ml";
doc() ! xdmp:document-delete( fn:document-uri(.) );

xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";; import module 
namespace search="http://marklogic.com/appservices/search"; at 
"/MarkLogic/appservices/search/search.xqy";


let $content1 :=
<foo-abc>
   <bar type="abc" xml:lang="de">Text abc</bar>
</foo-abc>

let $content2 :=
<foo-def>
   <bar xml:lang="de" type="def">Text def</bar>
</foo-def>

let $_ := xdmp:document-insert("/test/foo1",$content1)
let $_ := xdmp:document-insert("/test/foo2",$content2)
return "inserted documents 1 and 2"

-------------------

<<  supply on request some other tests -->

------------
Add an XQuery version of the same thing as xslt 

----------------


xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";; 



declare function local:copy($element as element()) as element() {
   element {node-name($element)}
      {$element/@*,
          for $child in $element/node()
              return
               if ($child instance of element())
                 then local:copy($child)
                 else $child
      }
};

let $content5 := document { local:copy( fn:doc("/test/foo1")/node() )}
let $content6 := document { local:copy( fn:doc("/test/foo2")/node() )}
let $_ := xdmp:document-insert("/test/foo5",$content5)
let $_ := xdmp:document-insert("/test/foo6",$content6)
return "inserted documents 5 and 6
----------

Strip out the search:search layer and go right to cts:search using  the 
original attribute value search:
----------------


xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";; 

declare function local:query( $lang  , $word  ) {
cts:element-attribute-word-query(
      xs:QName("bar"),
      xs:QName("type"),
      $word , "lang=" || $lang ) 
};


<all>{
 for $lang in ("en","de" )      
    for $word in("abc","def","Text") 
      let $query := local:query( $lang , $word )
      return
         <results word="{$word}" lang="{$lang}">{
      for $result in  cts:search( doc() , $query  ) 

        return <result  uri="{fn:document-uri($result)}">{$result}</result>
      }</results>
}</all>
---------------  Results before a reindex


<all>
  <results word="abc" lang="en">
    <result uri="/test/foo3">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo5">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
  </results>
  <results word="def" lang="en"/>
  <results word="Text" lang="en"/>
  <results word="abc" lang="de">
    <result uri="/test/foo1">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
  </results>
  <results word="def" lang="de">
    <result uri="/test/foo2">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo4">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo6">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
  </results>
  <results word="Text" lang="de"/>
</all>


-------  Now try with an element-word-query instead of attribute 
<all>
  <results word="abc" lang="en"/>
  <results word="def" lang="en"/>
  <results word="Text" lang="en"/>
  <results word="abc" lang="de">
    <result uri="/test/foo1">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo3">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo5">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
  </results>
  <results word="def" lang="de">
    <result uri="/test/foo2">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo4">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo6">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
  </results>
  <results word="Text" lang="de">
    <result uri="/test/foo1">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo2">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo3">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo4">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
    <result uri="/test/foo5">
      <foo-abc>
        <bar type="abc" xml:lang="de">Text abc</bar>
      </foo-abc>
    </result>
    <result uri="/test/foo6">
      <foo-def>
        <bar xml:lang="de" type="def">Text def</bar>
      </foo-def>
    </result>
  </results>
</all>


-----------------------------------------------------------------------------
David Lee
Lead Engineer
MarkLogic Corporation
[email protected]
Phone: +1 812-482-5224
Cell:  +1 812-630-7622
www.marklogic.com

-----Original Message-----
From: [email protected] 
[mailto:[email protected]] On Behalf Of David Lee
Sent: Thursday, June 25, 2015 11:01 AM
To: MarkLogic Developer Discussion
Subject: [MarkLogic Dev General] xml:lang - refinement step 1 - RE: Indexing 
strategy for attributes when using xdmp:xlst-invoke

To make concrete progress - I tried "step 1" - issolate XML semantics from 
search/index behavior.

I reproduced the original bug exactly (in 8.0.3).
Then cleared the DB and did the same thing but instead of cts:search used this 
pure XQuery statement to see if at the XML, XDM and XQuery  level if everything 
is right .
Looks so:
(only non-standard is xdmp:path() which is just to help clarify what is being 
tested )
Note: as per W3C specs the element, and attribute are correctly identified with 
the right language. (no  text content yet)

Next step - see if search works as documented - to tell if the problem is a 
docs bug, a code bug or a undefined-needs-clarification issue.


----------

for $d in doc() 
return <doc uri="{fn:document-uri($d)}"> { 
    for $e in 
       ($d/element() , $d//bar , $d//bar/@type , $d//bar/text()  )
          return 
            <lang-for path="{xdmp:path($e)}" 
lang="{$e/ancestor-or-self::*/@xml:lang}">{ 
              for $l in ("en","de") return
              <is-lang lang="{$l}">{ fn:lang( $l , $e ) } </is-lang>
            }</lang-for>
      }
      </doc>
-----------------------
Result:

element 
<doc uri="/test/foo1">
<lang-for path="/foo" lang="">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">false</is-lang>
</lang-for>
<lang-for path="/foo/bar" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
<lang-for path="/foo/bar/@type" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
</doc>
element 
<doc uri="/test/foo2">
<lang-for path="/foo" lang="">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">false</is-lang>
</lang-for>
<lang-for path="/foo/bar" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
<lang-for path="/foo/bar/@type" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
</doc>
element 
<doc uri="/test/foo3">
<lang-for path="/foo" lang="">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">false</is-lang>
</lang-for>
<lang-for path="/foo/bar" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
<lang-for path="/foo/bar/@type" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
</doc>
element 
<doc uri="/test/foo4">
<lang-for path="/foo" lang="">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">false</is-lang>
</lang-for>
<lang-for path="/foo/bar" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
<lang-for path="/foo/bar/@type" lang="de">
<is-lang lang="en">false</is-lang>
<is-lang lang="de">true</is-lang>
</lang-for>
</doc>

-----------------------------------------------------------------------------
David Lee
Lead Engineer
MarkLogic Corporation
[email protected]
Phone: +1 812-482-5224
Cell:  +1 812-630-7622
www.marklogic.com

-----Original Message-----
From: [email protected] 
[mailto:[email protected]] On Behalf Of David Lee
Sent: Thursday, June 25, 2015 9:24 AM
To: MarkLogic Developer Discussion
Subject: Re: [MarkLogic Dev General] Indexing strategy for attributes when 
using xdmp:xlst-invoke


> The docs are pretty clear that the xml:lang  affects the language of 
> the *child text* of elements,

  The XML spec says <http://www.w3.org/TR/REC-xml/#sec-lang-tag>:

    The language specified by xml:lang applies to the element where it is
    specified (including the values of its attributes) -----------<><<

Yes, interesting.

However the test cases that were shown are using non-standard functions - i.e. 
the result of  indexes and other vendor specific features (not XPath or XDM or 
XQuery standards).
How ML indexes things and returns results using cts:search() and such is not 
covered by any specs except ML's.  
The same is true for all implementations of products that extend a spec.

This isn't an excuse - there is obviously inconsistent behavior shown by pre 
and post deindexing, but the test cases don't really uncover what that is 
exactly - beyond 'unexpected'.

I am not suggesting this is anyone else's responsibility - just making a 
personal observation on pre-judging exactly 'what' is broken when given a 
particular test when testing features - 
If the tests are not testing the documented behavior - its not nearly so easy 
(for anyone) to judge if the observations are 'correct' or not.   The tests in 
this thread are (to my read) - *neither* test the documented ML vendor specific 
features against ML docs, nor testing XQuery/XML core features against the W3C 
docs.   So its not easy for either users or developers to make an objective 
statement about if its 'right' or not -- and if not, what exactly isn't 
'right'.   
 
That causes debates like this to proliferate instead of getting work done :)


.NOTE: ..>> ( This is a general cross industry /  cross company/organization 
statement
   - and a personal opinion)  

I bring this up 'preemptively' to help prioritize something as a 'bug' or 
'defect' vs 'that would be nice to improve ... someday'  If the problem 
reported doesn't conflict with product specific feature docs or core W3C docs, 
especially if it doesn't appear to be a common use case - its more likely to be 
considered a 'feature enhancement request' then a 'bug fix' -  and that 
(feature vs bug) , whether the product open source and written by the love of 
volunteers, or proprietary and written by paid staff - has a huge impact  on if 
or when it will be considered.
If only we all had infinite clones and time and resources :)






_______________________________________________
General mailing list
[email protected]
Manage your subscription at: 
http://developer.marklogic.com/mailman/listinfo/general
_______________________________________________
General mailing list
[email protected]
Manage your subscription at: 
http://developer.marklogic.com/mailman/listinfo/general
_______________________________________________
General mailing list
[email protected]
Manage your subscription at: 
http://developer.marklogic.com/mailman/listinfo/general

[MarkLogic Dev General] Target ! cts:atribute-word-query !! Was RE: xml:lang - refinement step 1 - RE: Indexing strategy for attributes when using xdmp:xlst-invoke

Reply via email to