Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/61038
Change subject: Tests update: parserTests sha, blackist, local copy of html2wt
tests
......................................................................
Tests update: parserTests sha, blackist, local copy of html2wt tests
* We have additional test failures:
- Some tests in parserTests.txt changed from disabled --> parsoid
which were failing Parsoid tests before parserTests.js started
respecting the 'disabled' flag.
* A couple of html2wt-centric tests fail in wt2html and html2html
modes after new test snippets were added there.
* Several new html2wt tests now pass after parserTests.txt was updated
to reflect latest Parsoid output.
Change-Id: I7ac845caf98d9d125cb435c721dfc988fca52a25
---
M js/tests/fetch-parserTests.txt.js
M js/tests/parserTests-blacklist.js
M js/tests/wt_escape.tests.txt
3 files changed, 121 insertions(+), 49 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/38/61038/1
diff --git a/js/tests/fetch-parserTests.txt.js
b/js/tests/fetch-parserTests.txt.js
index f9152a0..56fb107 100755
--- a/js/tests/fetch-parserTests.txt.js
+++ b/js/tests/fetch-parserTests.txt.js
@@ -10,9 +10,9 @@
// and update these hashes automatically.
//
// You can use 'sha1sum -b tests/parser/parserTests.txt' to compute this value:
-var expectedSHA1 = "19f1c3446841b04b26ede0db128b892bba969027";
+var expectedSHA1 = "409dfd42a9714b256f209565ac71c4059d4515b3";
// git log --pretty=oneline -1 tests/parser/parserTests.txt
-var latestCommit = "bd9f08424fac898601c1fca0402c4d8b98b45563";
+var latestCommit = "d1869088d2c4903dfa8b2c62ed0c18f4a40ea8b9";
var fs = require('fs'),
path = require('path'),
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index 07eb44d..4c6e1eb 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -71,7 +71,7 @@
add("wt2html", "Interwiki link with fragment (bug 2130)");
add("wt2html", "Handling html with a div self-closing tag");
add("wt2html", "2. Lists with start-of-line-transparent tokens before bullets:
Template close");
-add("wt2html", "Unclosed formatting tags that straddle lists are closed and
reopened\n(Disabled since php parser generates broken html -- relies on Tidy to
fix up)");
+add("wt2html", "Unclosed formatting tags that straddle lists are closed and
reopened\n(Parsoid-only since php parser generates broken html -- relies on
Tidy to fix up)");
add("wt2html", "List interrupted by empty line or heading");
add("wt2html", "Single-comment whitespace lines dont break lists, but
multi-comment whitespace lines do");
add("wt2html", "Magic Word: {{FULLPAGENAME}}");
@@ -466,13 +466,12 @@
add("wt2html", "Gallery with invalid title as link (bug 43964)");
add("wt2html", "Language parser function");
add("wt2html", "1. SOL-sensitive wikitext tokens as template-args");
+add("wt2html", "Headings: 0. Unnested");
add("wt2html", "Headings: 2. Outside heading nest on a single line
<h1>foo</h1>*bar");
-add("wt2html", "Headings: 4. No escaping needed (testing just h1 and h2)");
-add("wt2html", "HRs: 1. Single line");
+add("wt2html", "Headings: 6. Heading chars in SOL context");
add("wt2html", "Tables: 1b. No escaping needed");
add("wt2html", "Tables: 1c. No escaping needed");
add("wt2html", "Tables: 1d. No escaping needed");
-add("wt2html", "Tables: 3c. Nested in th -- no escaping needed");
add("wt2html", "Links 1. Quote marks in link text");
add("wt2html", "Links 2. WikiLinks: Escapes needed");
add("wt2html", "Links 3. WikiLinks: No escapes needed");
@@ -1062,7 +1061,9 @@
add("html2html", "Gallery override link with absolute external link (bug
34852)");
add("html2html", "Gallery override link with malicious javascript (bug
34852)");
add("html2html", "Gallery with invalid title as link (bug 43964)");
+add("html2html", "Headings: 0. Unnested");
add("html2html", "Headings: 2. Outside heading nest on a single line
<h1>foo</h1>*bar");
+add("html2html", "Headings: 6. Heading chars in SOL context");
add("html2html", "HRs: 1. Single line");
add("html2html", "Links 1. Quote marks in link text");
add("html2html", "Links 2. WikiLinks: Escapes needed");
@@ -1322,9 +1323,9 @@
add("html2wt", "Nested lists 6 (both elements empty)");
add("html2wt", "1. Lists with start-of-line-transparent tokens before bullets:
Comments");
add("html2wt", "2. Lists with start-of-line-transparent tokens before bullets:
Template close");
-add("html2wt", "Unbalanced closing block tags break a list\n(Disabled since
php parser generates broken html -- relies on Tidy to fix up)");
-add("html2wt", "Unbalanced closing non-block tags don't break a
list\n(Disabled since php parser generates broken html -- relies on Tidy to fix
up)");
-add("html2wt", "Unclosed formatting tags that straddle lists are closed and
reopened\n(Disabled since php parser generates broken html -- relies on Tidy to
fix up)");
+add("html2wt", "Unbalanced closing block tags break a list\n(Parsoid-only
since php parser generates broken html -- relies on Tidy to fix up)");
+add("html2wt", "Unbalanced closing non-block tags don't break a
list\n(Parsoid-only since php parser generates broken html -- relies on Tidy to
fix up)");
+add("html2wt", "Unclosed formatting tags that straddle lists are closed and
reopened\n(Parsoid-only since php parser generates broken html -- relies on
Tidy to fix up)");
add("html2wt", "List embedded in a non-block tag\n(Ugly Parsoid output --
worth fixing; Disabled for PHP parser since it relies on Tidy)");
add("html2wt", "List items are not parsed correctly following a <pre> block
(bug 785)");
add("html2wt", "List items from template");
@@ -1926,19 +1927,12 @@
add("html2wt", "Bug 34939 - Case insensitive link parsing ([HttP://])");
add("html2wt", "Bug 34939 - Case insensitive link parsing (HttP://)");
add("html2wt", "1. SOL-sensitive wikitext tokens as template-args");
-add("html2wt", "Headings: 0. Unnested");
-add("html2wt", "Headings: 3. Nested inside html with wikitext split by html
tags");
-add("html2wt", "Headings: 4. No escaping needed (testing just h1 and h2)");
-add("html2wt", "Headings: 5. Empty headings");
-add("html2wt", "Lists: 5. No unnecessary escapes");
add("html2wt", "HRs: 1. Single line");
add("html2wt", "Tables: 1b. No escaping needed");
add("html2wt", "Tables: 1c. No escaping needed");
add("html2wt", "Tables: 1d. No escaping needed");
-add("html2wt", "Tables: 3c. Nested in th -- no escaping needed");
add("html2wt", "Tables: 4d. No escaping needed");
add("html2wt", "Links 2. WikiLinks: Escapes needed");
-add("html2wt", "1. No unnecessary escapes");
add("html2wt", "HTML tag with 'unnecessary' entity encoding in attributes");
add("html2wt", "HTML tag with broken attribute value quoting");
add("html2wt", "Parsoid-only: HTML tag with broken attribute value quoting");
diff --git a/js/tests/wt_escape.tests.txt b/js/tests/wt_escape.tests.txt
index 2eba201..4bde2bc 100644
--- a/js/tests/wt_escape.tests.txt
+++ b/js/tests/wt_escape.tests.txt
@@ -1,3 +1,13 @@
+#### The following section of tests are primarily to test
+#### wikitext escaping capabilities of Parsoid. Given that
+#### escaping can be done any number of ways, the wikitext (input)
+#### is always adjusted to reflect how Parsoid adds nowiki
+#### escape tags.
+####
+#### We are marking several tests as parsoid-only since the
+#### HTML in the result section is different from what the
+#### PHP parser generates it.
+
#### --------------- Headings ---------------
#### 0. Unnested
#### 1. Nested inside html <h1>=foo=</h1>
@@ -12,9 +22,15 @@
!! input
<nowiki>=foo=</nowiki>
+<nowiki> =foo= </nowiki>
+<!--cmt-->
+<nowiki>=foo=</nowiki>
+
=foo''a''<nowiki>=</nowiki>
!! result
<p>=foo=
+</p><p> =foo=
+</p><p><!--cmt-->=foo=
</p><p>=foo<i>a</i>=
</p>
!!end
@@ -22,7 +38,7 @@
!! test
Headings: 1. Nested inside html
!! options
-disabled
+parsoid
!! input
=<nowiki>=foo=</nowiki>=
==<nowiki>=foo=</nowiki>==
@@ -42,7 +58,7 @@
!! test
Headings: 2. Outside heading nest on a single line <h1>foo</h1>*bar
!! options
-disabled
+parsoid
!! input
=foo=
<nowiki>*bar</nowiki>
@@ -59,7 +75,7 @@
!! test
Headings: 3. Nested inside html with wikitext split by html tags
!! options
-disabled
+parsoid
!! input
=='''bold'''<nowiki>foo=</nowiki>=
!! result
@@ -67,12 +83,14 @@
!!end
!! test
-Headings: 4. No escaping needed (testing just h1 and h2)
+Headings: 4a. No escaping needed (testing just h1 and h2)
!! options
-disabled
+parsoid
!! input
==foo=
=foo==
+= =foo= =
+==foo= bar=
===foo==
==foo===
=''=''foo==
@@ -80,23 +98,41 @@
!! result
<h1>=foo</h1>
<h1>foo=</h1>
+<h1> =foo= </h1>
+<h1>=foo= bar</h1>
<h2>=foo</h2>
<h2>foo=</h2>
<h1><i>=</i>foo=</h1>
<h1>=</h1>
+
+!!end
+
+!! test
+Headings: 4b. No escaping needed (inside p-tags)
+!! options
+parsoid
+!! input
+===
+=foo= x
+=foo= <s></s>
+!! result
+<p>===
+=foo= x
+=foo= <s></s>
+</p>
!!end
!! test
Headings: 5. Empty headings
!! options
-disabled
+parsoid
!! input
-=<nowiki></nowiki>=
-==<nowiki></nowiki>==
-===<nowiki></nowiki>===
-====<nowiki></nowiki>====
-=====<nowiki></nowiki>=====
-======<nowiki></nowiki>======
+=<nowiki/>=
+==<nowiki/>==
+===<nowiki/>===
+====<nowiki/>====
+=====<nowiki/>=====
+======<nowiki/>======
!! result
<h1></h1>
<h2></h2>
@@ -109,11 +145,13 @@
!! test
Headings: 6. Heading chars in SOL context
!! options
-disabled
+parsoid
!! input
<!--cmt--><nowiki>=h1=</nowiki>
+<!--cmt--><nowiki> =h1= </nowiki>
!! result
<p><!--cmt-->=h1=
+<!--cmt--> =h1=
</p>
!!end
@@ -129,7 +167,7 @@
#### ----------------------------------------
!! test
-Lists: 0. Outside nests
+Lists: 0. Outside nests
!! input
<nowiki>*foo</nowiki>
@@ -219,7 +257,7 @@
!! test
Lists: 4. No escapes needed
!! options
-disabled
+parsoid
!! input
*foo*bar
@@ -268,7 +306,7 @@
!! test
Lists: 6. Escape bullets in SOL position
!! options
-disabled
+parsoid
!! input
<!--cmt--><nowiki>*foo</nowiki>
!! result
@@ -294,14 +332,11 @@
!! test
HRs: 1. Single line
!! options
-disabled
+parsoid
!! input
-----
-<nowiki>----</nowiki>
-----
-<nowiki>=foo=</nowiki>
-----
-<nowiki>*foo</nowiki>
+----<nowiki>----</nowiki>
+----=foo=
+----*foo
!! result
<hr/>----
<hr/>=foo=
@@ -367,6 +402,8 @@
!! test
Tables: 2a. Nested in td
+!! options
+parsoid
!! input
{|
|<nowiki>foo|bar</nowiki>
@@ -380,6 +417,8 @@
!! test
Tables: 2b. Nested in td
+!! options
+parsoid
!! input
{|
|<nowiki>foo||bar</nowiki>
@@ -395,6 +434,8 @@
!! test
Tables: 2c. Nested in td -- no escaping needed
+!! options
+parsoid
!! input
{|
|foo!!bar
@@ -408,6 +449,8 @@
!! test
Tables: 3a. Nested in th
+!! options
+parsoid
!! input
{|
!foo!bar
@@ -421,6 +464,8 @@
!! test
Tables: 3b. Nested in th
+!! options
+parsoid
!! input
{|
!<nowiki>foo!!bar</nowiki>
@@ -433,7 +478,9 @@
!! end
!! test
-Tables: 3b. Nested in th -- no escaping needed
+Tables: 3c. Nested in th -- no escaping needed
+!! options
+parsoid
!! input
{|
!<nowiki>foo||bar</nowiki>
@@ -447,6 +494,8 @@
!! test
Tables: 4a. Escape -
+!! options
+parsoid
!! input
{|
|-
@@ -463,6 +512,8 @@
!! test
Tables: 4b. Escape +
+!! options
+parsoid
!! input
{|
|-
@@ -479,6 +530,8 @@
!! test
Tables: 4c. No escaping needed
+!! options
+parsoid
!! input
{|
|-
@@ -495,6 +548,31 @@
</tbody></table>
!! end
+### SSS FIXME: Disabled right now because accurate html2wt
+### on this snippet requires data-parsoid flags that we've
+### stripped out of these tests. We should scheme how we
+### we want to handle these kind of tests that require
+### data-parsoid flags for accurate html2wt serialization
+
+!! test
+Tables: 4d. No escaping needed
+!! options
+disabled
+!! input
+{|
+||+1
+||-2
+|}
+!! result
+<table>
+<tr>
+<td>+1
+</td>
+<td>-2
+</td></tr></table>
+
+!! end
+
#### --------------- Links ---------------
#### 1. Quote marks in link text
#### 2. Wikilinks: Escapes needed
@@ -505,17 +583,17 @@
!! test
Links 1. Quote marks in link text
!! options
-disabled
+parsoid
!! input
[[Foo|<nowiki>Foo''boo''</nowiki>]]
!! result
-<a rel="mw:WikiLink" href="Foo"
data-parsoid="{"tsr":[0,7],"contentPos":[5,5],"src":"[[Foo]]","bsp":[0,7],"stx":"simple"}">Foo''boo''</a>
+<a rel="mw:WikiLink" href="Foo">Foo''boo''</a>
!! end
!! test
Links 2. WikiLinks: Escapes needed
!! options
-disabled
+parsoid
!! input
[[Foo|<nowiki>[Foobar]</nowiki>]]
[[Foo|<nowiki>Foobar]</nowiki>]]
@@ -543,7 +621,7 @@
!! test
Links 3. WikiLinks: No escapes needed
!! options
-disabled
+parsoid
!! input
[[Foo|[Foobar]]
[[Foo|foo|bar]]
@@ -555,7 +633,7 @@
!! test
Links 4. ExtLinks: Escapes needed
!! options
-disabled
+parsoid
!! input
[http://google.com <nowiki>[google]</nowiki>]
[http://google.com <nowiki>google]</nowiki>]
@@ -567,7 +645,7 @@
!! test
Links 5. ExtLinks: No escapes needed
!! options
-disabled
+parsoid
!! input
[http://google.com [google]
!! result
@@ -614,7 +692,7 @@
!! test
2. Link fragments inside <i> and <b>
-(FIXME: Escaping one or both of [[ and ]] is also acceptable --
+(FIXME: Escaping one or both of [[ and ]] is also acceptable --
this is one of the shortcomings of this format)
!! input
''[[foo''<nowiki>]]</nowiki>
@@ -657,7 +735,7 @@
!! test
1. Leading space in SOL context should be escaped
!! options
-disabled
+parsoid
!! input
<nowiki> foo</nowiki>
<!--cmt--><nowiki> foo</nowiki>
@@ -675,7 +753,7 @@
!! test
1. a tags
!! options
-disabled
+parsoid
!! input
<a href="http://google.com">google</a>
!! result
--
To view, visit https://gerrit.wikimedia.org/r/61038
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7ac845caf98d9d125cb435c721dfc988fca52a25
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits