Divec has uploaded a new change for review.
https://gerrit.wikimedia.org/r/119721
Change subject: Fix segmentation of lists and tables
......................................................................
Fix segmentation of lists and tables
Change segmentation to work at the level of individual list items and
table cells. Before it was at the level of the whole list/table, which
produced illegal span children in table/ul nodes.
(This commit does not fix illegal HTML from sentences with unbalanced
tags, e.g. "He says <i>Stop! Hammer time</i> halfway through.").
Change-Id: I836b0c9f1c2680180f564accef63ce7b71974771
---
M server/segmentation/languages/CXParser.js
1 file changed, 3 insertions(+), 2 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ContentTranslation
refs/changes/21/119721/1
diff --git a/server/segmentation/languages/CXParser.js
b/server/segmentation/languages/CXParser.js
index 429d559..4b21306 100644
--- a/server/segmentation/languages/CXParser.js
+++ b/server/segmentation/languages/CXParser.js
@@ -131,7 +131,7 @@
CXParser.prototype.onopentag = function ( tag ) {
var attrName,
attributes,
- section = /[ph1-6]|figure|ul|div|table/;
+ section = /[ph1-6]|figure|div|td|th|li/;
if ( tag.name === 'a' && !this.inSentence ) {
// sentences starting with a link
@@ -176,7 +176,8 @@
* @param {string} tag
*/
CXParser.prototype.onclosetag = function ( tag ) {
- var section = /[ph1-6]|figure|ul|div|table/;
+ var section = /[ph1-6]|figure|div|td|th|li/;
+
if ( tag.match( section ) ) {
if ( this.inSentence ) {
// Avoid dangling sentence.
--
To view, visit https://gerrit.wikimedia.org/r/119721
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I836b0c9f1c2680180f564accef63ce7b71974771
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ContentTranslation
Gerrit-Branch: master
Gerrit-Owner: Divec <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits