Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/292078

Change subject: WIP: Auto-detect interwiki links without needing data-parsoid 
info
......................................................................

WIP: Auto-detect interwiki links without needing data-parsoid info

* This breaks the test "mw:ExtLink linking to a interwiki URL can be
  round-tripped losslessly (T94723)" but not sure that we need to
  preserve that behavior. Looks like there is some discussion around
  this in T102556.

Bug: T71207
Change-Id: I50edd10c4ac931f7606bade9466678fbe301a271
---
M lib/html2wt/LinkHandler.js
M tests/parserTests-blacklist.js
2 files changed, 12 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/78/292078/1

diff --git a/lib/html2wt/LinkHandler.js b/lib/html2wt/LinkHandler.js
index 78e0bbb..0a4e1f3 100644
--- a/lib/html2wt/LinkHandler.js
+++ b/lib/html2wt/LinkHandler.js
@@ -173,7 +173,7 @@
                                && !/^#|\?./.test(interWikiMatch[1])
                                        // ExtLinks should have content to 
convert.
                                && (rtData.type !== 'mw:ExtLink' || 
rtData.content.string || rtData.contentNode)
-                               && (dp.isIW || target.modified || 
rtData.contentModified)) {
+                               && (dp.isIW || !state.rtTestMode || 
target.modified || rtData.contentModified)) {
                                // External link that is really an interwiki 
link. Convert it.
                                if (rtData.type === 'mw:ExtLink') {
                                        rtData.type = 'mw:WikiLink';
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index db6c8d0..3a4b889 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -286,6 +286,7 @@
 
 
 // Blacklist for wt2wt
+add("wt2wt", "Parsing an URL", "[[:fr:🍺|http://fr.wikipedia.org/wiki/🍺]]\n<!-- 
EasterEgg we love beer, better be able be able to link to it -->");
 add("wt2wt", "Parsoid only: Quote balancing context should be restricted to 
td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to 
be fixed up)", "{|\n!''a''!!''b''\n|''a''||''b''\n|}");
 add("wt2wt", "Non-word characters don't terminate tag names (bug 17663, 40670, 
52022)", "<blockquote|>a\n\n<b→> doesn't terminate </b→>\n\n<bä> doesn't 
terminate </bä>\n\n<boo> doesn't terminate </boo>\n\n<s.foo> doesn't terminate 
</s.foo>\n\n<sub-ID#1>\n");
 add("wt2wt", "Non-word characters don't terminate tag names + tidy", 
"<blockquote|>a\n\n<b→> doesn't terminate </b→>\n\n<bä> doesn't terminate 
</bä>\n\n<boo> doesn't terminate </boo>\n\n<s.foo> doesn't terminate 
</s.foo>\n\n<sub-ID#1>\n");
@@ -302,6 +303,7 @@
 add("wt2wt", "External links: multiple legal whitespace is fine, Magnus. Don't 
break it please. (bug 5081)", "[http://www.example.com test]\n");
 add("wt2wt", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid", 
"''[http://example.com text''<nowiki/>'']''\n[http://example.com 
'''text''']'''<nowiki/>'''\n''Something [http://example.com in 
italic''<nowiki/>'']''\n''Something [http://example.com mixed''''', even 
bold''''']'''\n'''''Now [http://example.com both'''''<nowiki/>''''']'''''\n");
 add("wt2wt", "External link containing double-single-quotes with no space 
separating the url from text in italics", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas'' (1901) en el sitio de ][[Museo Picasso (París)|Museo Picasso]].\n");
+add("wt2wt", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723)", "[[wikipedia:European_Robin|European Robin]]\n");
 add("wt2wt", "Unclosed and unmatched quotes", "'''''Bold italic text '''with 
bold deactivated''' in between.'''''\n\n'''''Bold italic text ''with italic 
deactivated'' in between.'''''\n\n'''Bold text..'''\n\n..spanning two 
paragraphs (should not work).'''<nowiki/>'''\n\n'''Bold tag left 
open'''\n\n''Italic tag left open''\n\nNormal text.\n\n<!-- Unmatching number 
of opening, closing tags: -->\n'''This year'<nowiki/>'''s election ''should'' 
beat '''last year''''s.\n\n''Tom'''s car is bigger than 
'''''<nowiki/>'''Susan'''s.\n\nPlain ''italic'''s plain\n");
 add("wt2wt", "A table with captions with non-default spaced attributes and a 
table row", "{|\n|+ style=\"color: red;\" |caption2\n|+ style=\"color: red;\" | 
caption3\n|-\n| foo\n|}");
 add("wt2wt", "Table td-cell syntax variations", "{|\n| foo bar | baz\n| foo 
bar foo || baz\n| style=\"color:red;\" | baz\n| style='color:red;' || baz\n|}");
@@ -1320,7 +1322,12 @@
 
 // Blacklist for selser
 add("selser", "Extra newlines followed by heading [1,2,1,3,0,4,0,4,1,3,4]", 
"a\n\n40z1qdji2lfskyb9\n\n\n\n=b=\n9h4x5nzjh28rggb9\n\n[[a]]\n\nkup4bo01iztzkt9\n\n\n\nzlbbwe36wkrcnmi\n");
-add("selser", "Parsing an URL [[[2]],2,0]", 
"[[:fr:🍺|blv27e9oigmpwrk9http://fr.wikipedia.org/wiki/🍺]]\n\n7vcz2sjoo733ow29\n<!--
 EasterEgg we love beer, better be able be able to link to it -->");
+add("selser", "Parsing an URL [2,0,2]", 
"bs8j6bi9cqh0k9\n\nhttp://fr.wikipedia.org/wiki/🍺\n\nk7365yy4w9418aor<!-- 
EasterEgg we love beer, better be able be able to link to it -->\n");
+add("selser", "Parsing an URL [1,0,4]", 
"http://fr.wikipedia.org/wiki/🍺\n\n45l8ib6o1shjv2t9\n";);
+add("selser", "Parsing an URL [1,0,0]", "http://fr.wikipedia.org/wiki/🍺\n<!-- 
EasterEgg we love beer, better be able be able to link to it -->");
+add("selser", "Parsing an URL [0,4,3]", 
"http://fr.wikipedia.org/wiki/🍺\n\nxqtpipchumcxr\n";);
+add("selser", "Parsing an URL [0,3,3]", "http://fr.wikipedia.org/wiki/🍺\n";);
+add("selser", "Parsing an URL [1,0,2]", 
"http://fr.wikipedia.org/wiki/🍺\n\n2t2vp3lwj30ltyb9<!-- EasterEgg we love beer, 
better be able be able to link to it -->\n");
 add("selser", "Italics and bold: 5-quote opening sequence: (5,6) [[[2]]]", 
"''7mymhlqo5nhlg14i'''foo''''''");
 add("selser", "Parsoid only: Quote balancing context should be restricted to 
td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to 
be fixed up) [1]", "{| 
data-foobar=\"pl7el6nksli8uxr\"\n!''a!!''b\n|''a||''b\n|}");
 add("selser", "Parsoid only: Quote balancing context should be restricted to 
td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to 
be fixed up) [[4,[[4,0,2,[1],0],4]]]", 
"{|<!--44rbogtlx8hncdi-->\n!5oylcgfssnfav2t9!!''b\n!qcinqswgq2r9t3xr\n|''a''||''b<!--b7ncx26rmqiu23xr-->\n|}");
@@ -1457,8 +1464,9 @@
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[1,3,0]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas'' (1901) en el sitio de ]\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[4,0,3]]", "s7cmk1ocalpd5cdi\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[[1,2],2,4]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas''wvn1dm0gv694fgvi (1901) en el sitio de 
]bizwk64e6yfogvi3d9bhvaaeq8jv2t9");
-add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[4]]]", "[[wikipedia:European_Robin|w8te4gwhsniv0a4i]]");
-add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[2]]]", 
"[[wikipedia:European_Robin|jtvcvll5n6x0f6rEuropean Robin]]");
+add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [2]", 
"w9k5s550u1xj38fr\n\n[http://en.wikipedia.org/wiki/European_Robin European 
Robin]");
+add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[2]]", 
"etja73fm9wak0529[http://en.wikipedia.org/wiki/European_Robin European Robin]");
+add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [1]", "[http://en.wikipedia.org/wiki/European_Robin 
European Robin]");
 add("selser", "Unclosed and unmatched quotes 
[[[0,0,4]],2,3,3,[1],0,4,0,1,2,[[2]],3,3,3,2,0,3,4,1,2,2]", "'''''Bold italic 
text '''with bold deactivated0bet2nehr3gl23xr''\n\nmveau9dj2krl766r\n\n'''Bold 
text..'''\n\ny75pk6fh55lba9k9\n\n'''Bold tag left 
open\n\nivecp7jd69h8semi\n\n''939vypv7nzalwhfrItalic tag left 
open''\n\n4jx8l4e2s9kymn29<!-- Unmatching number of opening, closing tags: 
-->\n\nftnac0njf7i1wcdi\n\n''Tom'''s car is bigger than 
'''''<nowiki/>'''Susan'''s.\n\nwv6k36cpnikbuik9\n\nvtz1rewhs7i7ldi\n\nPlain 
''italic'''s plain");
 add("selser", "Unclosed and unmatched quotes 
[[1],0,[1],0,[3],0,[2,2],0,4,0,4,2,4,0,3,0,4,0,[[0,[4]],0,3],0,4]", "'''''Bold 
italic text '''with bold deactivated''' in between.'''''\n\n'''''Bold italic 
text ''with italic deactivated'' in between.'''''\n\ny4h85lgdrtacerk9..spanning 
two paragraphs (should not 
work).3gnmqbeuuymbo6r'''\n\n3m9bvegcf03sor\n\nskw0gib981xde7b9\n\ndjjwoysazbvgqfr\n\nqcokvvv9z09kke29\n\nvmtyprts14m9rudi\n\n''Tom'''1qr9kmif90ox0f6r'''''<nowiki/>'''Susan'''\n\n5e2ez0in4um78pvi\n");
 add("selser", "Unclosed and unmatched quotes 
[[[3,0,[3]]],0,[1],0,2,2,3,0,[4],4,[4],0,0,3,0,4,[2,3,3,4,3,0],0,4,2,[0,3,4]]", 
"''with bold deactivated'''<nowiki/>'''''\n\n'''''Bold italic text ''with 
italic deactivated'' in between.'''''\n\nuimy0isfyrykfbt9\n\n'''Bold 
text..\n\nhl888wghmewxw29\n\n4yq4z49x86ywrk9\n\nomsddcwsglihehfr\n\nmumepxtpar0vbo6r\n\nNormal
 text.<!-- Unmatching number of opening, closing tags: 
-->\n\ndska41gwjbwyu8fr\n\n508yy8ogfxd8ia4i'''This 
year''''0o13pli3vxojq0k9s.\n\n12et9k2znm5wb3xr\n\nl97w0g61j6rvbo6r\n\nPlain 
iahdy1xbdbjwz5mi");

-- 
To view, visit https://gerrit.wikimedia.org/r/292078
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I50edd10c4ac931f7606bade9466678fbe301a271
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to