Hi,

I have discovered that when you use a stylesheet with xdmp:xlst-invoke
to transform your document content in some circumstances attributes
are not indexed as you might expect.

- If within an element an attribute x appears before the xml:lang
attribute then this attribute x is indexed based on the default
language of the database.
- If within an element an attribute x appears after the xml:lang
attribute then this attribute x is indexed based on the language in
this previous xml:lang attribute.

Because the default language of the database can differ from the
language in the xml:lang attribute values for attribute x can be found
within different languages.

After reindexing the database all these attributes x are indexed
according to the xml:lang attribute that appears within the same
element.

This appears in both Marklogic 7 and Marklogic 8

Although this problem can easily be avoided does anyone know if a
certain option within the stylesheet should be used to avoid this? Or
might this perhaps be a bug?

An example is given below:

xquery version "1.0-ml";
declare namespace html = "http://www.w3.org/1999/xhtml";;
import module namespace
search="http://marklogic.com/appservices/search"; at
"/MarkLogic/appservices/search/search.xqy";

declare variable $SEARCH-OPTIONS :=
    <options xmlns="http://marklogic.com/appservices/search";>
        <search-option>unfiltered</search-option>
        <return-query>true</return-query>
        <return-results>true</return-results>

        <constraint name="type-de">
            <word>
                <attribute ns="" name="type"/>
                <element ns="" name="bar"/>
                <term-option>lang=de</term-option>
            </word>
        </constraint>
        <constraint name="type-en">
            <word>
                <attribute ns="" name="type"/>
                <element ns="" name="bar"/>
                <term-option>lang=en</term-option>
            </word>
        </constraint>
    </options>;

let $content1 :=
<foo>
   <bar type="abc" xml:lang="de">
   </bar>
</foo>

let $content2 :=
<foo>
   <bar xml:lang="de" type="def">
   </bar>
</foo>

(: default database language is 'en' :)

(: copy-and-paste.xsl is a stylesheet:

<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform";>
    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" />
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>
:)

(: Run 1: I add two documents :)

(:
let $_ := xdmp:document-insert("/test/foo1",$content1)
let $_ := xdmp:document-insert("/test/foo2",$content2)
return "inserted documents 1 and 2"
:)

(: Run 2 : I check the number of documents found in each language after run 1 :)

(:
let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total
let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total
let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total
let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total
return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and
language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ",
$found-de-def," and language 'en'/'def' : ", $found-en-def)
:)

(: Run 2 returns:
Language 'de'/'abc' : 1 and language 'en'/'abc' : 0 and language
'de'/'def' : 1 and language 'en'/'def' : 0
:)

(: Run 3 : I add two more documents based on the previous documents
using xdmp:xlst-invoke and the stylesheet :)

(:
let $content3 := xdmp:xslt-invoke("/app/xsl/copy-and-paste.xsl",
fn:doc("/test/foo1"))
let $content4 := xdmp:xslt-invoke("/app/xsl/copy-and-paste.xsl",
fn:doc("/test/foo2"))
let $_ := xdmp:document-insert("/test/foo3",$content3)
let $_ := xdmp:document-insert("/test/foo4",$content4)
return "inserted documents 3 and 4"
:)

(: Run 4 : I check the number of documents found in each language
after run 1 and 2 :)

(:
let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total
let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total
let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total
let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total
return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and
language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ",
$found-de-def," and language 'en'/'def' : ", $found-en-def)
:)

(: Run 4 returns:
Language 'de'/'abc' : 1 and language 'en'/'abc' : 1 and language
'de'/'def' : 2 and language 'en'/'def' : 0
:)

(: Then I reindex the database :)

(: Run 5 : I check the number of documents found in each language
after reindex :)

let $found-de-abc := search:search("type-de:abc", $SEARCH-OPTIONS)/@total
let $found-en-abc := search:search("type-en:abc", $SEARCH-OPTIONS)/@total
let $found-de-def := search:search("type-de:def", $SEARCH-OPTIONS)/@total
let $found-en-def := search:search("type-en:def", $SEARCH-OPTIONS)/@total
return fn:concat ("Language 'de'/'abc' : ", $found-de-abc," and
language 'en'/'abc' : ", $found-en-abc, " and language 'de'/'def' : ",
$found-de-def," and language 'en'/'def' : ", $found-en-def)

(: Run 5 returns:
Language 'de'/'abc' : 2 and language 'en'/'abc' : 0 and language
'de'/'def' : 2 and language 'en'/'def' : 0
:)


Thanks,

Johan de Boer
_______________________________________________
General mailing list
[email protected]
Manage your subscription at: 
http://developer.marklogic.com/mailman/listinfo/general

Reply via email to