Smalyshev has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/386548 )
Change subject: Make search for titles be always uppercase
......................................................................
Make search for titles be always uppercase
Fortunately, wikidata titles are uppercase.
We may need better solution, but that may require full
reindex.
Bug: T179045
Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05
(cherry picked from commit 4f91ffb6dba082aee721a402f602e85add2c0107)
---
M repo/includes/Search/Elastic/EntitySearchElastic.php
M repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
M repo/tests/phpunit/data/entitySearch/search_de-ch.expected
M repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
M repo/tests/phpunit/data/entitySearch/search_en.expected
M repo/tests/phpunit/data/entitySearch/search_en_strict.expected
A repo/tests/phpunit/data/entitySearch/search_id.expected
A repo/tests/phpunit/data/entitySearch/search_id.query
8 files changed, 187 insertions(+), 6 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase
refs/changes/48/386548/1
diff --git a/repo/includes/Search/Elastic/EntitySearchElastic.php
b/repo/includes/Search/Elastic/EntitySearchElastic.php
index 5e94101..b80bfcc 100644
--- a/repo/includes/Search/Elastic/EntitySearchElastic.php
+++ b/repo/includes/Search/Elastic/EntitySearchElastic.php
@@ -187,7 +187,9 @@
$labelsQuery = new BoolQuery();
$labelsQuery->addFilter( $labelsFilter );
$labelsQuery->addMust( $dismax );
- $titleMatch = new Term( [ 'title.keyword' => $text ] );
+ // TODO: this is a bit hacky, better way would be to make the
field case-insensitive
+ // or add new subfiled which is case-insensitive
+ $titleMatch = new Term( [ 'title.keyword' => strtoupper( $text
) ] );
// Match either labels or exact match to title
$query->addShould( $labelsQuery );
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
index 48d79ef..4a28724 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
@@ -129,7 +129,7 @@
},
{
"term": {
- "title.keyword": "Wien"
+ "title.keyword": "WIEN"
}
}
],
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
index 48ef5ea..81b6ba1 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
@@ -129,7 +129,7 @@
},
{
"term": {
- "title.keyword": "Wien"
+ "title.keyword": "WIEN"
}
}
],
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
index bc3b388..2163aca 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
@@ -59,7 +59,7 @@
},
{
"term": {
- "title.keyword": "Wien"
+ "title.keyword": "WIEN"
}
}
],
diff --git a/repo/tests/phpunit/data/entitySearch/search_en.expected
b/repo/tests/phpunit/data/entitySearch/search_en.expected
index 857c6bf..e745659 100644
--- a/repo/tests/phpunit/data/entitySearch/search_en.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_en.expected
@@ -69,7 +69,7 @@
},
{
"term": {
- "title.keyword": "Duck"
+ "title.keyword": "DUCK"
}
}
],
diff --git a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
index c6a9848..939a7d7 100644
--- a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
@@ -59,7 +59,7 @@
},
{
"term": {
- "title.keyword": "Duck"
+ "title.keyword": "DUCK"
}
}
],
diff --git a/repo/tests/phpunit/data/entitySearch/search_id.expected
b/repo/tests/phpunit/data/entitySearch/search_id.expected
new file mode 100644
index 0000000..5cab8fb
--- /dev/null
+++ b/repo/tests/phpunit/data/entitySearch/search_id.expected
@@ -0,0 +1,172 @@
+{
+ "description": "wikibase_prefix search for 'q42'",
+ "params": {
+ "timeout": "20s"
+ },
+ "query": {
+ "query": {
+ "bool": {
+ "should": [
+ {
+ "bool": {
+ "filter": [
+ {
+ "match": {
+ "labels_all.prefix": "q42"
+ }
+ }
+ ],
+ "must": [
+ {
+ "dis_max": {
+ "tie_breaker": 0,
+ "queries": [
+ {
+ "constant_score": {
+ "filter": {
+ "match": {
+
"labels.en.near_match": "q42"
+ }
+ },
+ "boost": 2
+ }
+ },
+ {
+ "constant_score": {
+ "filter": {
+ "match": {
+
"labels.en.near_match_folded": "q42"
+ }
+ },
+ "boost": 1.8
+ }
+ },
+ {
+ "constant_score": {
+ "filter": {
+ "match": {
+
"labels.en.prefix": "q42"
+ }
+ },
+ "boost": 1.1
+ }
+ },
+ {
+ "constant_score": {
+ "filter": {
+ "match": {
+
"labels_all.near_match_folded": "q42"
+ }
+ },
+ "boost": 0.001
+ }
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ {
+ "term": {
+ "title.keyword": "Q42"
+ }
+ }
+ ],
+ "minimum_should_match": 1,
+ "filter": [
+ {
+ "term": {
+ "content_model": "wikibase-item"
+ }
+ }
+ ]
+ }
+ },
+ "_source": [
+ "namespace",
+ "title",
+ "labels.en",
+ "descriptions.en"
+ ],
+ "stored_fields": [],
+ "highlight": {
+ "pre_tags": [
+ ""
+ ],
+ "post_tags": [
+ ""
+ ],
+ "fields": {
+ "title": {
+ "type": "experimental",
+ "fragmenter": "none",
+ "number_of_fragments": 0,
+ "matched_fields": [
+ "title.keyword"
+ ]
+ },
+ "labels.en.prefix": {
+ "type": "experimental",
+ "fragmenter": "none",
+ "number_of_fragments": 0,
+ "options": {
+ "skip_if_last_matched": true,
+ "return_snippets_and_offsets": true
+ }
+ },
+ "labels.*.prefix": {
+ "type": "experimental",
+ "fragmenter": "none",
+ "number_of_fragments": 0,
+ "options": {
+ "skip_if_last_matched": true,
+ "return_snippets_and_offsets": true
+ }
+ }
+ }
+ },
+ "size": 10,
+ "rescore": [
+ {
+ "window_size": 8192,
+ "query": {
+ "query_weight": 1,
+ "rescore_query_weight": 1,
+ "score_mode": "total",
+ "rescore_query": {
+ "function_score": {
+ "score_mode": "sum",
+ "functions": [
+ {
+ "script_score": {
+ "script": {
+ "inline":
"pow(doc['incoming_links'].value , 2) \/ ( pow(doc['incoming_links'].value, 2)
+ pow(50,2))",
+ "lang": "expression"
+ }
+ },
+ "weight": 0.6
+ },
+ {
+ "script_score": {
+ "script": {
+ "inline":
"pow(doc['sitelink_count'].value , 2) \/ ( pow(doc['sitelink_count'].value, 2)
+ pow(20,2))",
+ "lang": "expression"
+ }
+ },
+ "weight": 0.4
+ }
+ ]
+ }
+ }
+ }
+ }
+ ],
+ "stats": [
+ "wikibase-prefix"
+ ]
+ },
+ "options": {
+ "timeout": "20s"
+ }
+}
\ No newline at end of file
diff --git a/repo/tests/phpunit/data/entitySearch/search_id.query
b/repo/tests/phpunit/data/entitySearch/search_id.query
new file mode 100644
index 0000000..b9dac8b
--- /dev/null
+++ b/repo/tests/phpunit/data/entitySearch/search_id.query
@@ -0,0 +1,7 @@
+{
+ "search": "q42",
+ "language": "en",
+ "userLang": "en",
+ "type": "item",
+ "strictlanguage": false
+}
--
To view, visit https://gerrit.wikimedia.org/r/386548
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: wmf/1.31.0-wmf.5
Gerrit-Owner: Smalyshev <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits