Hi, I have discovered that when you use a stylesheet with xdmp:xlst-invoke to transform your document content in some circumstances attributes are not indexed as you might expect.
- If within an element an attribute x appears before the xml:lang attribute then this attribute x is indexed based on the default language of the database. - If within an element an attribute x appears after the xml:lang attribute then this attribute x is indexed based on the language in this previous xml:lang attribute. Because the default language of the database can differ from the language in the xml:lang attribute values for attribute x can be found within different languages. After reindexing the database all these attributes x are indexed according to the xml:lang attribute that appears within the same element. This appears in both Marklogic 7 and Marklogic 8 Although this problem can easily be avoided does anyone know if a certain option within the stylesheet should be used to avoid this? Or might this perhaps be a bug? An example is given below: xquery version "1.0-ml"; declare namespace html = "http://www.w3.org/1999/xhtml"; import module namespace search="http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy"; declare variable $SEARCH-OPTIONS := <options xmlns="http://marklogic.com/appservices/search"> <search-option>unfiltered</search-option> <return-query>true</return-query> <return-results>true</return-results> <constraint name="type-de"> <word> <attribute ns="" name="type"/> <element ns="" name="bar"/> <term-option>lang=de</term-option> </word> </constraint> <constraint name="type-en"> <word> <attribute ns="" name="type"/> <element ns="" name="bar"/> <term-option>lang=en</term-option> </word> </constraint> </options>; let $content1 := <foo> <bar type="abc" xml:lang="de"> </bar> </foo> let $content2 := <foo> <bar xml:lang="de" type="def"> </bar> </foo> (: default database language is 'en' :) (: copy-and-paste.xsl is a stylesheet: <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:template match="@*|node()"> <xsl:copy> <xsl:apply-templates select="@*|node()" /> </xsl:copy> </xsl:template> </xsl:stylesheet> :) (: Run 1: I add two documents :) (: let $_ := xdmp:document-insert("/test/foo1",$content1) let $_ := xdmp:document-insert("/test/foo2",$content2) return "inserted documents 1 and 2" :) (: Run 2 : I check the number of documents found in each language after run 1 :) (: let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ", $found-de-def," and language 'en'/'def' : ", $found-en-def) :) (: Run 2 returns: Language 'de'/'abc' : 1 and language 'en'/'abc' : 0 and language 'de'/'def' : 1 and language 'en'/'def' : 0 :) (: Run 3 : I add two more documents based on the previous documents using xdmp:xlst-invoke and the stylesheet :) (: let $content3 := xdmp:xslt-invoke("/app/xsl/copy-and-paste.xsl", fn:doc("/test/foo1")) let $content4 := xdmp:xslt-invoke("/app/xsl/copy-and-paste.xsl", fn:doc("/test/foo2")) let $_ := xdmp:document-insert("/test/foo3",$content3) let $_ := xdmp:document-insert("/test/foo4",$content4) return "inserted documents 3 and 4" :) (: Run 4 : I check the number of documents found in each language after run 1 and 2 :) (: let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ", $found-de-def," and language 'en'/'def' : ", $found-en-def) :) (: Run 4 returns: Language 'de'/'abc' : 1 and language 'en'/'abc' : 1 and language 'de'/'def' : 2 and language 'en'/'def' : 0 :) (: Then I reindex the database :) (: Run 5 : I check the number of documents found in each language after reindex :) let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ", $found-de-def," and language 'en'/'def' : ", $found-en-def) (: Run 5 returns: Language 'de'/'abc' : 2 and language 'en'/'abc' : 0 and language 'de'/'def' : 2 and language 'en'/'def' : 0 :) Thanks, Johan de Boer _______________________________________________ General mailing list [email protected] Manage your subscription at: http://developer.marklogic.com/mailman/listinfo/general
