Santhosh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/227652

Change subject: Improve reference adaptation to avoid parsoid errors
......................................................................

Improve reference adaptation to avoid parsoid errors

The current reference adaptation does not guarantee that the adapted
reference has valid data-mw attribute. Parsoid demands a valid id
in data-mw.body pointing to an item in references section. Not having
that cause parsoid errors during  HTML to wikitext conversion. It does not
cause parsoid to fail, but generated articles with errors as shown in
Bug T107252.

This patch improves and cleans up the parsoid adaptation process.
And it make sure a valid data-mw is present in the adapted reference.

Testplan:
Translate Vaporwave article from English to Spanish. Use MT for all
sections and publish. The published page should not have any errors about
references. Compare it with 
https://es.wikipedia.org/w/index.php?title=Vaporwave&oldid=84053572

Make sure reference card is working for all references and adding reference at
cursor position also works.
Repeat the test for translating Attack & Release article from en to es
and compare result with 
https://es.wikipedia.org/w/index.php?title=Attack_%26_Release&oldid=84052884

Bug: T107252
Change-Id: I0ab01b682a54ac3a96a52068740af85964e99807
---
M modules/tools/ext.cx.tools.reference.js
1 file changed, 43 insertions(+), 55 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ContentTranslation 
refs/changes/52/227652/1

diff --git a/modules/tools/ext.cx.tools.reference.js 
b/modules/tools/ext.cx.tools.reference.js
index 901eb94..2169885 100644
--- a/modules/tools/ext.cx.tools.reference.js
+++ b/modules/tools/ext.cx.tools.reference.js
@@ -128,7 +128,7 @@
         * See https://phabricator.wikimedia.org/T88290
         * and 
https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec#Ref_and_References
         * @param {string} referenceId The reference element Identifier.
-        * @return {string} The HTML content of the reference.
+        * @return {string|null} The HTML content of the reference.
         */
        ReferenceCard.prototype.getReferenceContent = function ( referenceId ) {
                var reference, referenceContentElement;
@@ -138,7 +138,7 @@
                        return null;
                }
                // Support traditional reference handling by Parsoid
-               if ( reference.body.html ) {
+               if ( reference.body && reference.body.html ) {
                        return reference.body.html;
                }
 
@@ -226,28 +226,11 @@
        /**
         * For the given reference id, get the reference data
         *
-        * This is very easy in the cases one reference used only once in the 
page.
-        * Such a reference link will have data-mw carrying the data.
-        *
-        * But when the same reference used in multiple places, this is tricky.
-        * The page will have the following markup(example) at the end of page, 
saying all these 3
-        * references are same. One of these links will have reference data. 
Not necessarily
-        * first one. So we have to iterate through all these siblings and find 
which one has
-        * reference data.
-        *
-        * Example from Antipasto article from eswiki:
-        * <li about="#cite_note-Jöel-1" data-parsoid="{}" id="130">
-        * <span data-parsoid="{}" rel="mw:referencedBy">↑
-        * <a class="cx-link" data-linkid="132" data-parsoid="{}" 
href="#cite_ref-Jöel-1-0">1.0</a>
-        * <a class="cx-link" data-linkid="133" data-parsoid="{}" 
href="#cite_ref-Jöel-1-1">1.1</a>
-        * <a class="cx-link" data-linkid="134" data-parsoid="{}" 
href="#cite_ref-Jöel-1-2">1.2</a>
-        * </span>
-        * </li>
         * @param {string} referenceId
         * @return {Object|null}
         */
        ReferenceCard.prototype.getReferenceData = function ( referenceId ) {
-               var $sourceReference, i, mwData, $sibling, $referenceSiblings, 
id;
+               var $sourceReference;
 
                $sourceReference = $( document.getElementById( referenceId ) );
                if ( !$sourceReference.is( '[typeof*="mw:Extension/ref"]' ) ) {
@@ -255,22 +238,7 @@
                        return null;
                }
 
-               $referenceSiblings = $( '[typeof*="mw:Extension/references"]' )
-                       .find( 'a[href="#' + referenceId + '"]' )
-                       .siblings()
-                       .addBack(); // Including self
-
-               for ( i = 0; i < $referenceSiblings.length; i++ ) {
-                       id = $( $referenceSiblings[ i ] ).attr( 'href' 
).replace( '#', '' );
-                       $sibling = $( document.getElementById( id ) );
-                       mwData = $sibling.data( 'mw' );
-                       if ( mwData && mwData.body ) {
-                               return mwData;
-                       }
-               }
-
-               // Did not see a case where we still not find reference data, 
but...
-               return null;
+               return $sourceReference.data( 'mw' );
        };
 
        /**
@@ -278,36 +246,56 @@
         * See 
https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec#Ref_and_References
         * We copy the data-mw that was adapted using the template 
configuration at
         * ext.cx.source.filter.js#adaptTemplate to the $references' 
mwData.body.html
+        *
         * @param {string} referenceId
         */
        ReferenceCard.prototype.adaptReference = function ( referenceId ) {
                var $referenceContent, $targetReference,
-                       mwData, adaptedData;
+                       mwData;
 
                $targetReference = $( document.getElementById( 'cx' + 
referenceId ) );
                mwData = this.getReferenceData( referenceId );
-               if ( !mwData || !mwData.body ) {
+               if ( !mwData ) {
+                       // This is almost impossible. Since every reference 
will have mw-data as
+                       // Parsoid DOM Spec
                        return;
                }
+               mwData.body = mwData.body || {};
+               if ( !mwData.body.id ) {
+                       /*
+                       Every reference must have a data-mw.body with id 
poiting to the item
+                       in References section. In general, we can just get copy 
the data-mw
+                       from the source reference. But there are cases it wont 
be filled in source reference.
+                       Example: When reference is reused more than once, the 
second reference might not have
+                       the data-mw.body.id. We need to find that id by looking 
at references section.
+                       To understand this, consider this example reference.
 
-               $referenceContent = $( '<div>' ).html( mwData.body.html );
-               /*
-               Reference template expands in references section as below
-               <ol about="#mwt11" typeof="mw:Extension/references">
-                       <li about="#cite_note-1" id="cite_note-1">
-                               <span rel="mw:referencedBy">
-                                       <a href="#cite_ref-1-0">↑</a>
+                       <span about="#mwt6" class="reference" 
id="cite_ref-three_3-0" rel="dc:references" typeof="mw:Extension/Ref"
+                           data-mw='{"name": "ref", "attrs": {"name": 
"three"}, "body":{"id":"mw-reference-text-cite_three-3"}}'>
+                         <a href="#cite_note-three-3">[3]</a>
+                       </span>
+                       When reused, note that body.id is missing.
+                       <span about="#mwt8" class="reference" 
id="cite_ref-three_3-1" rel="dc:references" typeof="mw:Extension/ref"
+                           data-mw='{"name":"ref", "attrs":{"name":"three"}}'>
+                         <a href="#cite_note-three-3">[3]</a>
+                       </span>
+                       Reference template expands in references section as 
below
+                       <ol about="#mwt11" typeof="mw:Extension/references">
+                       <li about="#cite_note-three-3" id="cite_note-three-3">
+                               <span rel="mw:referencedBy">↑
+                                       <a href="#cite_ref-three_3-0">3.0</a>
+                                       <a href="#cite_ref-three_3-1">3.1</a>
                                </span>
-                               <span>Reference content html goes here</span>
-               </li>
-               */
-               adaptedData = $( '[typeof*="mw:Extension/references"]' )
-                       .find( 'a[href="#' + referenceId + '"]' )
-                       .parent()
-                       .next()
-                       .data( 'mw' );
-               $referenceContent.children().attr( 'data-mw', JSON.stringify( 
adaptedData ) );
-               mwData.body.html = $referenceContent.html();
+                               <span id="mw-reference-text-cite_note-three-3" 
class="mw-reference-text" data-parsoid="{}">Three</span>
+                               </li>
+                       </li>
+                       */
+                       $referenceContent = $( 
'[typeof*="mw:Extension/references"]' )
+                               .find( 'a[href="#' + referenceId + '"]' )
+                               .closest( 'li' )
+                               .find( '.mw-reference-text' );
+                       mwData.body.id = $referenceContent.prop( 'id' );
+               }
                $targetReference.attr( 'data-mw', JSON.stringify( mwData ) );
                this.addReferenceList();
        };

-- 
To view, visit https://gerrit.wikimedia.org/r/227652
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0ab01b682a54ac3a96a52068740af85964e99807
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ContentTranslation
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to