Damon,

Whether the document counts are "correct" I don't know. I suppose they are, but 
they are not what I was expecting. Here's the situation in detail:

When I insert two documents into new, empty test database, like so...

xquery version "1.0-ml";
let $doc1 :=
<METS:mets xmlns:METS="http://www.loc.gov/METS/"; 
xmlns:dc="http://purl.org/dc/elements/1.1/";>
    <!-- metadata section follows -->
    <METS:metsHdr>
      <!-- ... -->
    </METS:metsHdr>
    <!-- data for journal as a whole -->
    <METS:dmdSec ID="dmd001">
        <METS:mdWrap MIMETYPE="text/xml">
            <METS:xmlData>
                <dc:title>The Princeton Seminary Bulletin 28:3</dc:title>
                <dc:date>2007</dc:date>
                <dc:type>Journal</dc:type>
                <dc:source>The Princeton Seminary Bulletin</dc:source>
            </METS:xmlData>
        </METS:mdWrap>
    </METS:dmdSec>
    <!-- data for first article follows -->
    <METS:dmdSec ID="dmd002">
        <METS:mdWrap MIMETYPE="text/xml">
            <METS:xmlData>
                <dc:title>Some Thoughts on Doing Theology in Public</dc:title>
                <dc:creator>John R. Bowlin</dc:creator>
                <dc:date>2007</dc:date>
                <dc:type>Article</dc:type>
                <dc:source>The Princeton Seminary Bulletin, v. 28, no. 3 (Nov. 
2007)</dc:source>
                <dc:coverage>235-243</dc:coverage>
            </METS:xmlData>
        </METS:mdWrap>
    </METS:dmdSec>
    <!-- data for second article follows -->
    <METS:dmdSec ID="dmd003">
        <METS:mdWrap MIMETYPE="text/xml">
            <METS:xmlData>
                <dc:title>Death Threat: I Corinthians 11:17-34a</dc:title>
                <dc:creator>Luke A. Powery</dc:creator>
                <dc:date>2007</dc:date>
                <dc:type>Article</dc:type>
                <dc:source>The Princeton Seminary Bulletin, v. 28, no. 3 (Nov. 
2007)</dc:source>
                <dc:coverage>244-250</dc:coverage>
            </METS:xmlData>
        </METS:mdWrap>
    </METS:dmdSec>
</METS:mets>

let $doc2 :=
<METS:mets xmlns:METS="http://www.loc.gov/METS/"; 
xmlns:dc="http://purl.org/dc/elements/1.1/";>
  <!-- metadata section follows -->
  <METS:metsHdr>
    <!-- ... -->
  </METS:metsHdr>
  <!-- data for journal as a whole -->
  <METS:dmdSec ID="dmd001">
    <METS:mdWrap MIMETYPE="text/xml">
      <METS:xmlData>
        <dc:title>The Princeton Seminary Bulletin 28:2</dc:title>
        <dc:date>2007</dc:date>
        <dc:type>Journal</dc:type>
        <dc:source>The Princeton Seminary Bulletin</dc:source>
      </METS:xmlData>
    </METS:mdWrap>
  </METS:dmdSec>
  <!-- data for first article follows -->
  <METS:dmdSec ID="dmd002">
    <METS:mdWrap MIMETYPE="text/xml">
      <METS:xmlData>
        <dc:title>The Unexpected Future</dc:title>
        <dc:creator>Iain R. Torrance</dc:creator>
        <dc:date>2007</dc:date>
        <dc:type>Article</dc:type>
        <dc:source>The Princeton Seminary Bulletin 28:2 (2007)</dc:source>
        <dc:coverage>119-122</dc:coverage>
      </METS:xmlData>
    </METS:mdWrap>
  </METS:dmdSec>
  <!-- data for second article follows -->
  <METS:dmdSec ID="dmd003">
    <METS:mdWrap MIMETYPE="text/xml">
      <METS:xmlData>
        <dc:title>The Excellence of Ministry</dc:title>
        <dc:creator>Daniel L. Migliore</dc:creator>
        <dc:date>2007</dc:date>
        <dc:type>Article</dc:type>
        <dc:source>The Princeton Seminary Bulletin 28:2 (2007)</dc:source>
        <dc:coverage>123-128</dc:coverage>
      </METS:xmlData>
    </METS:mdWrap>
  </METS:dmdSec>
</METS:mets>

return
  (xdmp:document-insert("/doc1.xml", $doc1),
   xdmp:document-insert("/doc2.xml", $doc2))

... and then run the following query against that database...

xquery version "1.0-ml";
import module namespace search = "http://marklogic.com/appservices/search"; at 
"/MarkLogic/appservices/search/search.xqy";

let $options :=
  <options xmlns="http://marklogic.com/appservices/search";>
    <searchable-expression xmlns:mets="http://www.loc.gov/METS/";>
      /mets:mets/mets:dmdSec
    </searchable-expression>
    <constraint name="date-bucket" 
xmlns="http://marklogic.com/appservices/search";>
      <range type="xs:gYear">
        <element ns="http://purl.org/dc/elements/1.1/"; name="date"/>
        <bucket name="any-1800" lt="1801">–1800</bucket>
        <bucket name="1801-1810" ge="1801" lt="1811">1801–1810</bucket>
        <bucket name="1811-1820" ge="1811" lt="1821">1811–1820</bucket>
        <bucket name="1821-1830" ge="1821" lt="1831">1821–1830</bucket>
        <bucket name="1831-1840" ge="1831" lt="1841">1831–1840</bucket>
        <bucket name="1841-1850" ge="1841" lt="1851">1841–1850</bucket>
        <bucket name="1851-1860" ge="1851" lt="1861">1851–1860</bucket>
        <bucket name="1861-1870" ge="1861" lt="1871">1861–1870</bucket>
        <bucket name="1871-1880" ge="1871" lt="1881">1871–1880</bucket>
        <bucket name="1881-1890" ge="1881" lt="1891">1881–1890</bucket>
        <bucket name="1891-1900" ge="1891" lt="1901">1891–1900</bucket>
        <bucket name="1901-1910" ge="1901" lt="1911">1901–1910</bucket>
        <bucket name="1911-1920" ge="1911" lt="1921">1911–1920</bucket>
        <bucket name="1921-1930" ge="1921" lt="1931">1921–1930</bucket>
        <bucket name="1931-1940" ge="1931" lt="1941">1931–1940</bucket>
        <bucket name="1941-1950" ge="1941" lt="1951">1941–1950</bucket>
        <bucket name="1951-1960" ge="1951" lt="1961">1951–1960</bucket>
        <bucket name="1961-1970" ge="1961" lt="1971">1961–1970</bucket>
        <bucket name="1971-1980" ge="1971" lt="1981">1971–1980</bucket>
        <bucket name="1981-1990" ge="1981" lt="1991">1981–1990</bucket>
        <bucket name="1991-2000" ge="1991" lt="2001">1991–2000</bucket>
        <bucket name="2001-any" ge="2001">2001–</bucket>
      </range>
    </constraint>
    <constraint name="type" xmlns="http://marklogic.com/appservices/search";>
      <range type="xs:string">
        <element ns="http://purl.org/dc/elements/1.1/"; name="type"/>
        <facet-option>frequency-order</facet-option>
        <facet-option>descending</facet-option>
      </range>
    </constraint>
    <constraint name="source" xmlns="http://marklogic.com/appservices/search";>
      <range type="xs:string">
        <element ns="http://purl.org/dc/elements/1.1/"; name="source"/>
        <facet-option>frequency-order</facet-option>
        <facet-option>descending</facet-option>
      </range>
    </constraint>
  </options>
return search:search("", $options)

... I get this response:

<search:response total="2" start="1" page-length="10" 
xmlns:search="http://marklogic.com/appservices/search";>
  <search:result index="1" uri="/doc2.xml" 
path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[1]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[1]">The 
Princeton Seminary Bulletin 28:2 2007 Journal The Princeton Seminary 
Bulletin</search:match>
    </search:snippet>
  </search:result>
  <search:result index="2" uri="/doc2.xml" 
path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[2]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[2]">The 
Unexpected Future Iain R. Torrance 2007 Article The Princeton Seminary Bulletin 
28:2 (2007) 119-122</search:match>
    </search:snippet>
  </search:result>
  <search:result index="3" uri="/doc2.xml" 
path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[3]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[3]">The 
Excellence of Ministry Daniel L. Migliore 2007 Article The Princeton Seminary 
Bulletin 28:2 (2007) 123-128</search:match>
    </search:snippet>
  </search:result>
  <search:result index="4" uri="/doc1.xml" 
path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[1]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[1]">The 
Princeton Seminary Bulletin 28:3 2007 Journal The Princeton Seminary 
Bulletin</search:match>
    </search:snippet>
  </search:result>
  <search:result index="5" uri="/doc1.xml" 
path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[2]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match 
path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[2]">Some Thoughts on Doing 
Theology in Public John R. Bowlin 2007 Article The Princeton Seminary Bulletin, 
v. 28, no. 3 (Nov. 2007) 235-243</search:match>
    </search:snippet>
  </search:result>
  <search:result index="6" uri="/doc1.xml" 
path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[3]" score="0" 
confidence="0" fitness="0">
    <search:snippet>
      <search:match 
path="fn:doc(&quot;/doc1.xml&quot;)/*:mets/*:dmdSec[3]">Death Threat: I 
Corinthians 11:17-34a Luke A. Powery 2007 Article The Princeton Seminary 
Bulletin, v. 28, no. 3 (Nov. 2007) 244-250</search:match>
    </search:snippet>
  </search:result>
  <search:facet name="date-bucket">
    <search:facet-value name="2001-any" count="2">2001–</search:facet-value>
  </search:facet>
  <search:facet name="type">
    <search:facet-value name="Article" count="2">Article</search:facet-value>
    <search:facet-value name="Journal" count="2">Journal</search:facet-value>
  </search:facet>
  <search:facet name="source">
    <search:facet-value name="The Princeton Seminary Bulletin" count="2">The 
Princeton Seminary Bulletin</search:facet-value>
    <search:facet-value name="The Princeton Seminary Bulletin 28:2 (2007)" 
count="1">The Princeton Seminary Bulletin 28:2 (2007)</search:facet-value>
    <search:facet-value name="The Princeton Seminary Bulletin, v. 28, no. 3 
(Nov. 2007)" count="1">The Princeton Seminary Bulletin, v. 28, no. 3 (Nov. 
2007)</search:facet-value>
  </search:facet>
  <search:qtext/>
  <search:metrics>
    <search:query-resolution-time>PT0.016S</search:query-resolution-time>
    <search:facet-resolution-time>PT0.016S</search:facet-resolution-time>
    <search:snippet-resolution-time>PT0S</search:snippet-resolution-time>
    <search:total-time>PT0.032S</search:total-time>
  </search:metrics>
</search:response>

To me, there are two things that are unexpected about this response. (1) The 
@total is 2, which is the number of documents, not the number of search 
results, which is 6. (2) The facet counts have the same problem: they 
correspond to the number of documents, not the search results.

Similarly, if I run the same query except changing the qtext so it's not 
empty...

return search:search("future", $options)

... I get this response:

<search:response total="1" start="1" page-length="10" 
xmlns:search="http://marklogic.com/appservices/search";>
  <search:result index="1" uri="/doc2.xml" 
path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[2]" score="104" 
confidence="0.669882" fitness="0.669882">
    <search:snippet>
      <search:match 
path="fn:doc(&quot;/doc2.xml&quot;)/*:mets/*:dmdSec[2]/*:mdWrap/*:xmlData/*:title">The
 Unexpected <search:highlight>Future</search:highlight></search:match>
    </search:snippet>
  </search:result>
  <search:facet name="date-bucket">
    <search:facet-value name="2001-any" count="1">2001–</search:facet-value>
  </search:facet>
  <search:facet name="type">
    <search:facet-value name="Article" count="1">Article</search:facet-value>
    <search:facet-value name="Journal" count="1">Journal</search:facet-value>
  </search:facet>
  <search:facet name="source">
    <search:facet-value name="The Princeton Seminary Bulletin" count="1">The 
Princeton Seminary Bulletin</search:facet-value>
    <search:facet-value name="The Princeton Seminary Bulletin 28:2 (2007)" 
count="1">The Princeton Seminary Bulletin 28:2 (2007)</search:facet-value>
  </search:facet>
  <search:qtext>future</search:qtext>
  <search:metrics>
    <search:query-resolution-time>PT0.015S</search:query-resolution-time>
    <search:facet-resolution-time>PT0.031S</search:facet-resolution-time>
    <search:snippet-resolution-time>PT0S</search:snippet-resolution-time>
    <search:total-time>PT0.046S</search:total-time>
  </search:metrics>
</search:response>

There is only one search result, so I would expect each facet to contain only 
one <search:facet-value>, but again, the facets are actually based on the 
entire document that the search result came from.

Many thanks,
Greg


On Nov 10, 2011, at 8:53 AM, Damon Feldman wrote:

> Greg,
> 
> Are the overall document counts correct? The total count comes from 
> cts:remainder() or xdmp:estimate() under the covers which are an index-only 
> operations like facet counts. It might help if you post a small sample of the 
> form
> 
>  xdmp:document-insert(uri1, doc1), Xdmp:document-insert(uri2, doc2)
>  ; (: transaction separator :)
> 
>  let $options := ...
>  return search:search(...)
> 
> that shows the wrong count so we understand the type of searchable expression 
> and facets you are having trouble with.
> 
> Yours,
> Damon
> 
> -----Original Message-----
> From: [email protected] 
> [mailto:[email protected]] On Behalf Of Murray, Gregory
> Sent: Thursday, November 10, 2011 8:45 AM
> To: General MarkLogic Developer Discussion
> Subject: Re: [MarkLogic Dev General] How to get different facet counts for 
> different searchable-expression in Search API
> 
> I should have mentioned that I'm using 4.2-1
> 
> Any suggestions greatly appreciated.
> 
> Thanks,
> Greg
> 
> On Nov 9, 2011, at 5:21 PM, Murray, Gregory wrote:
> 
>> I'm having a similar problem with facet counts when using 
>> <searchable-expression>. After reading this thread, I'm afraid I still don't 
>> understand how to circumvent the problem. When using 
>> <searchable-expression>, it appears that the search results are constrained 
>> to that expression whereas the facet counts are not. Is there a 
>> facet-related option to similarly constrain a facet to an XPath expression? 
>> I've seen references to the "fragment-frequency" option, but appears to have 
>> no effect in this context.
>> 
>> Many thanks,
>> Greg
>> 
>> Gregory Murray
>> Digital Library Application Developer
>> Princeton Theological Seminary
>> 
>> 
>> On Oct 18, 2011, at 8:30 PM, Michael Blakeley wrote:
>> 
>>> Will, if I can jump in.... I think your idea of using different QNames is 
>>> the right way to look at it.
>>> 
>>> Facets are built from range indexes, and range indexes contain lists of 
>>> values and fragment ids for a given QName. So if the query matches the 
>>> fragment, the facet will show all the values in that fragment. In your case 
>>> the fragment is the entire document, so you will see all the values in the 
>>> matching documents, whether they occur under /doc or under /doc//cite. Now, 
>>> you *could* create a fragment root on 'cite', but I think that would be 
>>> counter-productive. It's better to use different QNames and have different 
>>> range indexes.
>>> 
>>> So I think what you'd want to do is simply arrange for a different set of 
>>> search options for doc vs cite, including both searchable expression and 
>>> constraints. Testing for that could be as simple as a call to 
>>> cts:contains($user-search, 'select:cite') before you call search:search(). 
>>> Or if that might generate false positives, you could search:parse the user 
>>> query and then look at the cts:query XML to see whether or not the parser 
>>> found a select:cite term. If it did, then you can switch to the correct 
>>> options before calling search:resolve.
>>> 
>>> -- Mike
>>> 
>>> On 18 Oct 2011, at 17:14 , Will Thompson wrote:
>>> 
>>>> Micah,
>>>> 
>>>> I think I may have explained poorly. This is essentially what I'm doing -- 
>>>> Docs are, generally, like this:
>>>> 
>>>> <doc>
>>>> <search-meta/>
>>>> <p>...<cite><search-meta/></cite>...</p>
>>>> <section>
>>>> <p>...<cite><search-meta/></cite>...</p>
>>>> ...
>>>> </section>
>>>> </doc>
>>>> 
>>>> Searches operate over //doc by default, but if you add the operator/state 
>>>> "select:cite" it changes the searchable expression to //cite. The results 
>>>> are correct, but the problem is that the facet counts appear to be for 
>>>> *both* doc and cite metadata, and thus do not change when toggling 
>>>> searchable-expressions via operator/state.
>>>> 
>>>> This won't make any sense to our users, who will expect the facet counts 
>>>> to match what they think they're searching for.
>>>> 
>>>> -W
>>>> 
>>>> 
>>>> -----Original Message-----
>>>> From: [email protected] 
>>>> [mailto:[email protected]] On Behalf Of Micah Dubinko
>>>> Sent: Tuesday, October 18, 2011 6:56 PM
>>>> To: General MarkLogic Developer Discussion
>>>> Subject: Re: [MarkLogic Dev General] How to get different facet counts for 
>>>> different searchable-expression in Search API
>>>> 
>>>> Hi Will,
>>>> 
>>>> Everything you want to search exists in document fragments (not 
>>>> properties) right?
>>>> 
>>>> What would happen if you switched in a different searchable-expression via 
>>>> operator and state? The combined query is taken into account by faceting, 
>>>> but the searchable-expression is not.
>>>> 
>>>> -m
>>>> 
>>>> 
>>>> On Oct 18, 2011, at 4:42 PM, Will Thompson wrote:
>>>> 
>>>>> Our app has typically searched only document-type elements, but I 
>>>>> recently added metadata to citation elements (contained within and 
>>>>> scattered about document elements) so that they can be optionally 
>>>>> searched using an operator. i.e.: "term1 term2 select:citations" The 
>>>>> operator changes the searchable-expression and transform-results to 
>>>>> search only citation elements and return citation-specific snippets.
>>>>> 
>>>>> However, I need the facet counts to reflect the search being performed - 
>>>>> i.e.: only show estimates for document element direct-child metadata 
>>>>> during normal search, and only for citations when that is toggled using 
>>>>> the operator. 
>>>>> 
>>>>> My first thought was to use different names or namespace for the citation 
>>>>> metadata and have the operator toggle a separate set of constraints 
>>>>> associated with those names. But constraints are not supported children 
>>>>> of search:state under search:operator.
>>>>> 
>>>>> Any ideas on how to accomplish this with Search API? 
>>>>> 
>>>>> Thanks!
>>>>> 
>>>>> -Will
>>>>> 
>>>>> _______________________________________________
>>>>> General mailing list
>>>>> [email protected]
>>>>> http://developer.marklogic.com/mailman/listinfo/general
>>>> 
>>>> _______________________________________________
>>>> General mailing list
>>>> [email protected]
>>>> http://developer.marklogic.com/mailman/listinfo/general
>>>> _______________________________________________
>>>> General mailing list
>>>> [email protected]
>>>> http://developer.marklogic.com/mailman/listinfo/general
>>>> 
>>> 
>>> _______________________________________________
>>> General mailing list
>>> [email protected]
>>> http://developer.marklogic.com/mailman/listinfo/general
>> 
>> _______________________________________________
>> General mailing list
>> [email protected]
>> http://developer.marklogic.com/mailman/listinfo/general
> 
> _______________________________________________
> General mailing list
> [email protected]
> http://developer.marklogic.com/mailman/listinfo/general
> _______________________________________________
> General mailing list
> [email protected]
> http://developer.marklogic.com/mailman/listinfo/general

_______________________________________________
General mailing list
[email protected]
http://developer.marklogic.com/mailman/listinfo/general

Reply via email to