Hiong3-eng5 has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/93922


Change subject: API: added batch process to add text annotation
......................................................................

API: added batch process to add text annotation

plus some restructuring of output

Change-Id: Iac98b20cef0f69af7f40eef172bb5634403c6073
---
M includes/api/owAddAnnotation.php
1 file changed, 173 insertions(+), 15 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikiLexicalData 
refs/changes/22/93922/1

diff --git a/includes/api/owAddAnnotation.php b/includes/api/owAddAnnotation.php
index a00f1b6..e30a08a 100644
--- a/includes/api/owAddAnnotation.php
+++ b/includes/api/owAddAnnotation.php
@@ -17,6 +17,7 @@
 
        public function execute() {
 
+               $this->wikipage = false;
                $typeExist = 0;
                $result = '';
 
@@ -44,6 +45,15 @@
 
                // The Type of Annotation
                if ( $params['type'] == 'text') {
+
+                       // If wikipage, use batch processing
+                       if ( $params['wikipage'] ) {
+                               $this->wikipage = true;
+                               $text = $this->processBatchText( 
$params['wikipage'] );
+                               return true;
+                       }
+
+                       // if not, add just one syntrans
 
                        // Parameter checks
 
@@ -319,6 +329,9 @@
                        'dm_relation' => array (
                                ApiBase::PARAM_TYPE => 'integer'
                        ),
+                       'wikipage' => array (
+                               ApiBase::PARAM_TYPE => 'string',
+                       ),
                        'test' => array (
                                ApiBase::PARAM_TYPE => 'string'
                        )
@@ -343,6 +356,7 @@
                                for defined meaning relations or a defined 
meaning id
                                for syntrans relations between two syntrans with
                                different defined meaning ids",
+                       'wikipage' => 'The wikipage to process. (tsv format, 
using wiki page)',
                        'test' => 'test mode. No changes are made.'
                );
        }
@@ -351,15 +365,27 @@
        public function getExamples() {
                return array(
                        'Add text type syntrans annotation',
-                       
'api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml',
-                       'or to test it',
-                       
'api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml&test',
+                       ' 
api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml',
+                       ' or to test it',
+                       ' 
api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml&test',
                        'Add text type defined meaning annotation',
-                       
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml',
-                       
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml',
-                       'or to test it',
-                       
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml&test',
-                       
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml&test',
+                       ' 
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml',
+                       ' 
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml',
+                       ' or to test it',
+                       ' 
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml&test',
+                       ' 
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml&test',
+                       'You can also add synonym/translation annotations using 
a TSV file format saved in a Wiki Page.  The file must ',
+                       'contain 6 columns:',
+                       ' defined_meaning_id    (int)',
+                       ' attribute             (string)',
+                       ' attribute language_id (int)',
+                       ' text                  (string)',
+                       ' expression            (string)',
+                       ' language_id           (int)',
+                       '  
api.php?action=ow_add_annotation&type=text&wikipage=User:Minnan.import.bot/addTextAnnotation&dm=0&format=xml',
+                       '  or to test it',
+                       '  
api.php?action=ow_add_annotation&type=text&wikipage=User:Minnan.import.bot/addTextAnnotationTest&dm=0&format=xml&test',
+                       '',
                        'Add option type syntrans annotation',
                        
'api.php?action=ow_add_annotation&type=option&e=acusar&lang=spa&dm=837820&attribute=part%20of%20speech&attrib_lang=eng&option=verb&option_lang=eng&format=xml',
                        
'api.php?action=ow_add_annotation&type=option&e=case&lang=eng&dm=7367&attribute=usage&attrib_lang=eng&option=colloquial&option_lang=eng&format=xml',
@@ -381,7 +407,120 @@
                );
        }
 
-       private function processAddTextAttributeValues($spelling = null, 
$language = null, $definedMeaningId, $attribute, $attribLang, $text) {
+       public function processBatchText( $wikiPage ) {
+               global $params;
+
+               $csvWikiPageTitle = Title::newFromText( $wikiPage );
+               $csvWikiPage = new WikiPage ( $csvWikiPageTitle );
+
+               if ( !$wikiText = $csvWikiPage->getContent( Revision::RAW ) ) {
+                       return $this->getResult()->addValue( null, 
$this->getModuleName(),
+                               array ( 'result' => array (
+                                       'error' => "WikiPage ( 
$csvWikiPageTitle ) does not exist"
+                               ) )
+                       );
+               }
+
+               $text = $wikiText->mText;
+
+               // Check if the page is redirected,
+               // then adjust accordingly.
+               preg_match( "/REDIRECT \[\[(.+)\]\]/", $text, $match2 );
+               if ( isset($match2[1]) ) {
+                       $redirectedText = $match2[1];
+                       $csvWikiPageTitle = Title::newFromText( $redirectedText 
);
+                       $csvWikiPage = new WikiPage ( $csvWikiPageTitle );
+                       $wikiText = $csvWikiPage->getContent( Revision::RAW );
+                       $text = $wikiText->mText;
+               }
+
+               $process = array (
+                       'text' =>  'wikipage',
+                       'type' => 'batch processing',
+               );
+
+               $testrun = array();
+               if ( $this->test ) {
+                       $process['note'] = 'test run only';
+               }
+
+               $this->getResult()->addValue( null, $this->getModuleName(),
+                       array ( 'process' => $process
+                       )
+               );
+
+               $inputLine = explode( "\n", $text );
+               $ctr = 0;
+               foreach ( $inputLine as $inputData ) {
+                       $this->continue = true;
+                       $ctr++;
+                       $inputData = trim( $inputData );
+
+                       // Check if TSV
+                       $inputMatch = preg_match("/     /", $inputData, $match 
);
+
+                       if ( $inputMatch and $this->continue ) {
+                               $inputData = explode( " ", $inputData );
+                               $inputDataCount = count( $inputData );
+
+                               if ( $inputDataCount == 4 and $this->continue ) 
{
+                                       $inputData[] = null;
+                                       $inputData[] = null;
+                                       $inputDataCount = count( $inputData );
+                               }
+
+                               if ( $inputDataCount < 6 OR $inputDataCount > 6 
and $this->continue ) {
+                                       $result = array ( 'note' => "invalid 
column count. {$inputDataCount} instead of 6");
+                                       $this->getResult()->addValue( null, 
$this->getModuleName(),
+                                               array ( 'result' . $ctr => 
$result )
+                                       );
+                                       $this->continue = false;
+                               }
+                               if ( $this->continue ) {
+                                       $definedMeaningId = $inputData[0];
+                                       $attribute = preg_replace( '/(^"|"$)/', 
'', $inputData[1] );
+                                       $attribLang = preg_replace( 
'/(^"|"$)/', '', $inputData[2] );
+                                       $text = preg_replace( '/(^"|"$)/', '', 
$inputData[3] );
+                                       $spelling = preg_replace( '/(^"|"$)/', 
'', $inputData[4] );
+                                       $language = preg_replace( '/(^"|"$)/', 
'', $inputData[5] );
+                               }
+                       } else {
+                               if ( $inputData == null ) {
+                                       $result = array ( 'note' => "skipped 
blank line");
+                                       $this->getResult()->addValue( null, 
$this->getModuleName(),
+                                               array ( 'result' . $ctr => 
$result )
+                                       );
+                                       $this->continue = false;
+                               } else {
+                                       $result = array ( 'note' => "non TSV 
line `{$inputData}`");
+                                       $this->getResult()->addValue( null, 
$this->getModuleName(),
+                                               array ( 'result' . $ctr => 
$result )
+                                       );
+                                       $this->continue = false;
+                               }
+                       }
+
+                       if ( $this->continue ) {
+                               if ( !is_numeric( $definedMeaningId ) ) {
+                                       if ($ctr == 1) {
+                                               $result = array ( 'note' => 
"$definedMeaningId is not an int or probably just the CSV header");
+                                       } else {
+                                               $result = array ( 'note' => 
"$definedMeaningId is not an int");
+                                       }
+                               } else {
+                                       $result = 
$this->processAddTextAttributeValues( $spelling, $language, $definedMeaningId, 
$attribute, $attribLang, $text );
+                               }
+
+                               $this->getResult()->addValue( null, 
$this->getModuleName(),
+                                       array ( 'result' . $ctr => $result )
+                               );
+                       }
+
+               }
+               return true;
+       }
+
+       private function processAddTextAttributeValues( $spelling = null, 
$language = null, $definedMeaningId, $attribute, $attribLang, $text ) {
                $dc = wdGetDataSetContext();
 
                // if spelling is not null, process object as syntrans
@@ -446,6 +585,16 @@
                                )
                        );
                }
+
+               // get DM expression for clarity
+               $definedMeaningLanguageId = WLD_ENGLISH_LANG_ID;
+               $syntrans = null;
+               if ( $spelling ) {
+                       $definedMeaningLanguageId = $languageId;
+                       $syntrans = "to expression:`{$spelling}` ";
+               }
+               $expression = getDefinedMeaningSpellingForLanguage( 
$definedMeaningId, $definedMeaningLanguageId );
+
                // Add values if does not exist
                $valueId = getTextAttributeValueId( $this->objectId, 
$this->attributeId, $text );
                if ( !$valueId ) {
@@ -453,23 +602,32 @@
                                startNewTransaction( $this->getUser()->getID(), 
"0.0.0.0", "Added using API function add_annotation", $dc);
                                $valueId = addTextAttributeValue( 
$this->objectId, $this->attributeId, $text );
                        }
-                       $note['result'] = array(
+                       $note = array(
                                'status' => 'added'
                        );
 
-                       if ( $value_id ) {
-                               $note['result']['value_id'] = $valueId;
+                       if ( $valueId ) {
+                               $note['value_id'] = $valueId;
                        }
 
-                       if ( $this->test ) {
+                       if ( $this->wikipage ) {
+                               $note['note'] = "{$attribute} `{$text}` 
{$syntrans}for concept {$expression}({$definedMeaningId})";
+                       }
+
+                       if ( $this->test && !$this->wikipage ) {
                                $note['note'] = 'test run only';
                        }
                } else {
-                       $note['result'] = array(
+                       $note = array(
                                'status' => 'exists',
                                'value_id' => $valueId
                        );
-                       if ( $this->test ) {
+
+                       if ( $this->wikipage ) {
+                               $note['note'] = "{$attribute} `{$text}` 
{$syntrans}for concept {$expression}({$definedMeaningId})";
+                       }
+
+                       if ( $this->test && !$this->wikipage ) {
                                $note['note'] = 'test run only';
                        }
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/93922
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iac98b20cef0f69af7f40eef172bb5634403c6073
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikiLexicalData
Gerrit-Branch: master
Gerrit-Owner: Hiong3-eng5 <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to