[
https://issues.apache.org/jira/browse/SOLR-12409?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sambhav Kothari updated SOLR-12409:
-----------------------------------
Description:
Hello,
I experienced a weird behaviour with dismax and edismax query parsers.
Dismax will include pf boosts when we query something that has just a single
word, edismax on the other hand will not include pf boosts.
The result is that a dismax and an edismax handler with the same set of
defaults, return different results for single word queries (eg. "Hello") but
the same results for multi word queries (eg. "Hello Wold")
Is this expected?
Attaching debug query logs for both below.
{code:java}
// Single word query against dismax QH
{
"rawquerystring":"pink",
"querystring":"pink",
"parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
DisjunctionMaxQuery(((sortname:pink)^1.2 | (alias:pink)^1.2 |
(comment:pink)^1.2 | (artist:pink)^1.5))
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)
((sortname:pink)^1.2 | (alias:pink)^1.2 | (comment:pink)^1.2 |
(artist:pink)^1.5) (log(sum(int(ref_count),const(1))))^3.0"
}
// Single word query against edismax QH
{
"rawquerystring":"pink ",
"querystring":"pink ",
"parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)) ()
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) ()
(log(sum(int(ref_count),const(1))))^3.0"
}
// Multi word query against dismax QH
{
"rawquerystring":"pink floyd",
"querystring":"pink floyd",
"parsedquery":"+(DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
DisjunctionMaxQuery((area:floyd | country:floyd | ipi:floyd |
(artist:floyd)^2.0 | beginarea:floyd | type:floyd | (sortname:floyd)^2.0 |
isni:floyd | alias:floyd | comment:floyd | ngram:floyd | tag:floyd)))~2
DisjunctionMaxQuery(((sortname:\"pink floyd\")^1.2 | (alias:\"pink floyd\")^1.2
| (comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5))
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) (area:floyd |
country:floyd | ipi:floyd | (artist:floyd)^2.0 | beginarea:floyd | type:floyd |
(sortname:floyd)^2.0 | isni:floyd | alias:floyd | comment:floyd | ngram:floyd |
tag:floyd))~2) ((sortname:\"pink floyd\")^1.2 | (alias:\"pink floyd\")^1.2 |
(comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5)
(log(sum(int(ref_count),const(1))))^3.0"
}
// Multi word query against edismax QH
{
"rawquerystring":"pink floyd",
"querystring":"pink floyd",
"parsedquery":"+DisjunctionMaxQuery((((area:pink area:floyd)~2) | country:pink
floyd | ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0 |
((beginarea:pink beginarea:floyd)~2) | type:pink floyd | ((sortname:pink
sortname:floyd)~2)^2.0 | isni:pinkfloyd | ((alias:pink alias:floyd)~2) |
((comment:pink comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink
tag:floyd)~2))) DisjunctionMaxQuery(((artist:\"pink floyd\")^1.5 |
(alias:\"pink floyd\")^1.2 | (sortname:\"pink floyd\")^1.2 | (comment:\"pink
floyd\")^1.2)) FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(((area:pink area:floyd)~2) | country:pink floyd |
ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0 | ((beginarea:pink
beginarea:floyd)~2) | type:pink floyd | ((sortname:pink sortname:floyd)~2)^2.0
| isni:pinkfloyd | ((alias:pink alias:floyd)~2) | ((comment:pink
comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink tag:floyd)~2))
((artist:\"pink floyd\")^1.5 | (alias:\"pink floyd\")^1.2 | (sortname:\"pink
floyd\")^1.2 | (comment:\"pink floyd\")^1.2)
(log(sum(int(ref_count),const(1))))^3.0"
}
{code}
was:
Hello,
I experienced a weird behaviour with dismax and edismax query parsers.
Dismax will include pf boosts when we query something that has just a single
word, edismax on the other hand will not include pf boosts.
The result is that a dismax and an edismax handler with the same set of
defaults, return different results for single word queries (eg. "Hello") but
the same results for multi word queries (eg. "Hello Wold")
Is this expected?
Regards,
Sam
Attaching debug query logs for both below.
{code:java}
// Single word query against dismax QH
{
"rawquerystring":"pink",
"querystring":"pink",
"parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
DisjunctionMaxQuery(((sortname:pink)^1.2 | (alias:pink)^1.2 |
(comment:pink)^1.2 | (artist:pink)^1.5))
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)
((sortname:pink)^1.2 | (alias:pink)^1.2 | (comment:pink)^1.2 |
(artist:pink)^1.5) (log(sum(int(ref_count),const(1))))^3.0"
}
// Single word query against edismax QH
{
"rawquerystring":"pink ",
"querystring":"pink ",
"parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)) ()
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) ()
(log(sum(int(ref_count),const(1))))^3.0"
}
// Multi word query against dismax QH
{
"rawquerystring":"pink floyd",
"querystring":"pink floyd",
"parsedquery":"+(DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
DisjunctionMaxQuery((area:floyd | country:floyd | ipi:floyd |
(artist:floyd)^2.0 | beginarea:floyd | type:floyd | (sortname:floyd)^2.0 |
isni:floyd | alias:floyd | comment:floyd | ngram:floyd | tag:floyd)))~2
DisjunctionMaxQuery(((sortname:\"pink floyd\")^1.2 | (alias:\"pink floyd\")^1.2
| (comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5))
FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(((area:pink | country:pink | ipi:pink |
(artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) (area:floyd |
country:floyd | ipi:floyd | (artist:floyd)^2.0 | beginarea:floyd | type:floyd |
(sortname:floyd)^2.0 | isni:floyd | alias:floyd | comment:floyd | ngram:floyd |
tag:floyd))~2) ((sortname:\"pink floyd\")^1.2 | (alias:\"pink floyd\")^1.2 |
(comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5)
(log(sum(int(ref_count),const(1))))^3.0"
}
// Multi word query against edismax QH
{
"rawquerystring":"pink floyd",
"querystring":"pink floyd",
"parsedquery":"+DisjunctionMaxQuery((((area:pink area:floyd)~2) | country:pink
floyd | ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0 |
((beginarea:pink beginarea:floyd)~2) | type:pink floyd | ((sortname:pink
sortname:floyd)~2)^2.0 | isni:pinkfloyd | ((alias:pink alias:floyd)~2) |
((comment:pink comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink
tag:floyd)~2))) DisjunctionMaxQuery(((artist:\"pink floyd\")^1.5 |
(alias:\"pink floyd\")^1.2 | (sortname:\"pink floyd\")^1.2 | (comment:\"pink
floyd\")^1.2)) FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
"parsedquery_toString":"+(((area:pink area:floyd)~2) | country:pink floyd |
ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0 | ((beginarea:pink
beginarea:floyd)~2) | type:pink floyd | ((sortname:pink sortname:floyd)~2)^2.0
| isni:pinkfloyd | ((alias:pink alias:floyd)~2) | ((comment:pink
comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink tag:floyd)~2))
((artist:\"pink floyd\")^1.5 | (alias:\"pink floyd\")^1.2 | (sortname:\"pink
floyd\")^1.2 | (comment:\"pink floyd\")^1.2)
(log(sum(int(ref_count),const(1))))^3.0"
}
{code}
> Different behaviour of pf with dismax and edismax
> -------------------------------------------------
>
> Key: SOLR-12409
> URL: https://issues.apache.org/jira/browse/SOLR-12409
> Project: Solr
> Issue Type: Bug
> Security Level: Public(Default Security Level. Issues are Public)
> Components: query parsers
> Affects Versions: 7.3.1
> Environment: Ubuntu 17.10
>
> Reporter: Sambhav Kothari
> Priority: Major
>
> Hello,
>
> I experienced a weird behaviour with dismax and edismax query parsers.
> Dismax will include pf boosts when we query something that has just a single
> word, edismax on the other hand will not include pf boosts.
>
> The result is that a dismax and an edismax handler with the same set of
> defaults, return different results for single word queries (eg. "Hello") but
> the same results for multi word queries (eg. "Hello Wold")
>
> Is this expected?
>
> Attaching debug query logs for both below.
>
> {code:java}
> // Single word query against dismax QH
> {
> "rawquerystring":"pink",
> "querystring":"pink",
> "parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
> DisjunctionMaxQuery(((sortname:pink)^1.2 | (alias:pink)^1.2 |
> (comment:pink)^1.2 | (artist:pink)^1.5))
> FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
> "parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)
> ((sortname:pink)^1.2 | (alias:pink)^1.2 | (comment:pink)^1.2 |
> (artist:pink)^1.5) (log(sum(int(ref_count),const(1))))^3.0"
> }
> // Single word query against edismax QH
> {
> "rawquerystring":"pink ",
> "querystring":"pink ",
> "parsedquery":"+DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink)) ()
> FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
> "parsedquery_toString":"+(area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) ()
> (log(sum(int(ref_count),const(1))))^3.0"
> }
> // Multi word query against dismax QH
> {
> "rawquerystring":"pink floyd",
> "querystring":"pink floyd",
> "parsedquery":"+(DisjunctionMaxQuery((area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink))
> DisjunctionMaxQuery((area:floyd | country:floyd | ipi:floyd |
> (artist:floyd)^2.0 | beginarea:floyd | type:floyd | (sortname:floyd)^2.0 |
> isni:floyd | alias:floyd | comment:floyd | ngram:floyd | tag:floyd)))~2
> DisjunctionMaxQuery(((sortname:\"pink floyd\")^1.2 | (alias:\"pink
> floyd\")^1.2 | (comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5))
> FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
> "parsedquery_toString":"+(((area:pink | country:pink | ipi:pink |
> (artist:pink)^2.0 | beginarea:pink | type:pink | (sortname:pink)^2.0 |
> isni:pink | alias:pink | comment:pink | ngram:pink | tag:pink) (area:floyd |
> country:floyd | ipi:floyd | (artist:floyd)^2.0 | beginarea:floyd | type:floyd
> | (sortname:floyd)^2.0 | isni:floyd | alias:floyd | comment:floyd |
> ngram:floyd | tag:floyd))~2) ((sortname:\"pink floyd\")^1.2 | (alias:\"pink
> floyd\")^1.2 | (comment:\"pink floyd\")^1.2 | (artist:\"pink floyd\")^1.5)
> (log(sum(int(ref_count),const(1))))^3.0"
> }
> // Multi word query against edismax QH
> {
> "rawquerystring":"pink floyd",
> "querystring":"pink floyd",
> "parsedquery":"+DisjunctionMaxQuery((((area:pink area:floyd)~2) |
> country:pink floyd | ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0
> | ((beginarea:pink beginarea:floyd)~2) | type:pink floyd | ((sortname:pink
> sortname:floyd)~2)^2.0 | isni:pinkfloyd | ((alias:pink alias:floyd)~2) |
> ((comment:pink comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink
> tag:floyd)~2))) DisjunctionMaxQuery(((artist:\"pink floyd\")^1.5 |
> (alias:\"pink floyd\")^1.2 | (sortname:\"pink floyd\")^1.2 | (comment:\"pink
> floyd\")^1.2)) FunctionQuery(log(sum(int(ref_count),const(1))))^3.0",
> "parsedquery_toString":"+(((area:pink area:floyd)~2) | country:pink floyd |
> ipi:pinkfloyd | () | ((artist:pink artist:floyd)~2)^2.0 | ((beginarea:pink
> beginarea:floyd)~2) | type:pink floyd | ((sortname:pink
> sortname:floyd)~2)^2.0 | isni:pinkfloyd | ((alias:pink alias:floyd)~2) |
> ((comment:pink comment:floyd)~2) | ((ngram:pink ngram:floyd)~2) | ((tag:pink
> tag:floyd)~2)) ((artist:\"pink floyd\")^1.5 | (alias:\"pink floyd\")^1.2 |
> (sortname:\"pink floyd\")^1.2 | (comment:\"pink floyd\")^1.2)
> (log(sum(int(ref_count),const(1))))^3.0"
> }
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]