Hiong3-eng5 has uploaded a new change for review.
https://gerrit.wikimedia.org/r/93922
Change subject: API: added batch process to add text annotation
......................................................................
API: added batch process to add text annotation
plus some restructuring of output
Change-Id: Iac98b20cef0f69af7f40eef172bb5634403c6073
---
M includes/api/owAddAnnotation.php
1 file changed, 173 insertions(+), 15 deletions(-)
git pull
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikiLexicalData
refs/changes/22/93922/1
diff --git a/includes/api/owAddAnnotation.php b/includes/api/owAddAnnotation.php
index a00f1b6..e30a08a 100644
--- a/includes/api/owAddAnnotation.php
+++ b/includes/api/owAddAnnotation.php
@@ -17,6 +17,7 @@
public function execute() {
+ $this->wikipage = false;
$typeExist = 0;
$result = '';
@@ -44,6 +45,15 @@
// The Type of Annotation
if ( $params['type'] == 'text') {
+
+ // If wikipage, use batch processing
+ if ( $params['wikipage'] ) {
+ $this->wikipage = true;
+ $text = $this->processBatchText(
$params['wikipage'] );
+ return true;
+ }
+
+ // if not, add just one syntrans
// Parameter checks
@@ -319,6 +329,9 @@
'dm_relation' => array (
ApiBase::PARAM_TYPE => 'integer'
),
+ 'wikipage' => array (
+ ApiBase::PARAM_TYPE => 'string',
+ ),
'test' => array (
ApiBase::PARAM_TYPE => 'string'
)
@@ -343,6 +356,7 @@
for defined meaning relations or a defined
meaning id
for syntrans relations between two syntrans with
different defined meaning ids",
+ 'wikipage' => 'The wikipage to process. (tsv format,
using wiki page)',
'test' => 'test mode. No changes are made.'
);
}
@@ -351,15 +365,27 @@
public function getExamples() {
return array(
'Add text type syntrans annotation',
-
'api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml',
- 'or to test it',
-
'api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml&test',
+ '
api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml',
+ ' or to test it',
+ '
api.php?action=ow_add_annotation&type=text&e=acusar&lang=spa&dm=837820&attribute=hyphenation&attrib_lang=eng&text=a·cu·sar&format=xml&test',
'Add text type defined meaning annotation',
-
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml',
-
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml',
- 'or to test it',
-
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml&test',
-
'api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml&test',
+ '
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml',
+ '
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml',
+ ' or to test it',
+ '
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=chemical%20symbol&attrib_lang=eng&text=Fe&format=xml&test',
+ '
api.php?action=ow_add_annotation&type=text&dm=2024&attribute=atomic%20number&attrib_lang=eng&text=26&format=xml&test',
+ 'You can also add synonym/translation annotations using
a TSV file format saved in a Wiki Page. The file must ',
+ 'contain 6 columns:',
+ ' defined_meaning_id (int)',
+ ' attribute (string)',
+ ' attribute language_id (int)',
+ ' text (string)',
+ ' expression (string)',
+ ' language_id (int)',
+ '
api.php?action=ow_add_annotation&type=text&wikipage=User:Minnan.import.bot/addTextAnnotation&dm=0&format=xml',
+ ' or to test it',
+ '
api.php?action=ow_add_annotation&type=text&wikipage=User:Minnan.import.bot/addTextAnnotationTest&dm=0&format=xml&test',
+ '',
'Add option type syntrans annotation',
'api.php?action=ow_add_annotation&type=option&e=acusar&lang=spa&dm=837820&attribute=part%20of%20speech&attrib_lang=eng&option=verb&option_lang=eng&format=xml',
'api.php?action=ow_add_annotation&type=option&e=case&lang=eng&dm=7367&attribute=usage&attrib_lang=eng&option=colloquial&option_lang=eng&format=xml',
@@ -381,7 +407,120 @@
);
}
- private function processAddTextAttributeValues($spelling = null,
$language = null, $definedMeaningId, $attribute, $attribLang, $text) {
+ public function processBatchText( $wikiPage ) {
+ global $params;
+
+ $csvWikiPageTitle = Title::newFromText( $wikiPage );
+ $csvWikiPage = new WikiPage ( $csvWikiPageTitle );
+
+ if ( !$wikiText = $csvWikiPage->getContent( Revision::RAW ) ) {
+ return $this->getResult()->addValue( null,
$this->getModuleName(),
+ array ( 'result' => array (
+ 'error' => "WikiPage (
$csvWikiPageTitle ) does not exist"
+ ) )
+ );
+ }
+
+ $text = $wikiText->mText;
+
+ // Check if the page is redirected,
+ // then adjust accordingly.
+ preg_match( "/REDIRECT \[\[(.+)\]\]/", $text, $match2 );
+ if ( isset($match2[1]) ) {
+ $redirectedText = $match2[1];
+ $csvWikiPageTitle = Title::newFromText( $redirectedText
);
+ $csvWikiPage = new WikiPage ( $csvWikiPageTitle );
+ $wikiText = $csvWikiPage->getContent( Revision::RAW );
+ $text = $wikiText->mText;
+ }
+
+ $process = array (
+ 'text' => 'wikipage',
+ 'type' => 'batch processing',
+ );
+
+ $testrun = array();
+ if ( $this->test ) {
+ $process['note'] = 'test run only';
+ }
+
+ $this->getResult()->addValue( null, $this->getModuleName(),
+ array ( 'process' => $process
+ )
+ );
+
+ $inputLine = explode( "\n", $text );
+ $ctr = 0;
+ foreach ( $inputLine as $inputData ) {
+ $this->continue = true;
+ $ctr++;
+ $inputData = trim( $inputData );
+
+ // Check if TSV
+ $inputMatch = preg_match("/ /", $inputData, $match
);
+
+ if ( $inputMatch and $this->continue ) {
+ $inputData = explode( " ", $inputData );
+ $inputDataCount = count( $inputData );
+
+ if ( $inputDataCount == 4 and $this->continue )
{
+ $inputData[] = null;
+ $inputData[] = null;
+ $inputDataCount = count( $inputData );
+ }
+
+ if ( $inputDataCount < 6 OR $inputDataCount > 6
and $this->continue ) {
+ $result = array ( 'note' => "invalid
column count. {$inputDataCount} instead of 6");
+ $this->getResult()->addValue( null,
$this->getModuleName(),
+ array ( 'result' . $ctr =>
$result )
+ );
+ $this->continue = false;
+ }
+ if ( $this->continue ) {
+ $definedMeaningId = $inputData[0];
+ $attribute = preg_replace( '/(^"|"$)/',
'', $inputData[1] );
+ $attribLang = preg_replace(
'/(^"|"$)/', '', $inputData[2] );
+ $text = preg_replace( '/(^"|"$)/', '',
$inputData[3] );
+ $spelling = preg_replace( '/(^"|"$)/',
'', $inputData[4] );
+ $language = preg_replace( '/(^"|"$)/',
'', $inputData[5] );
+ }
+ } else {
+ if ( $inputData == null ) {
+ $result = array ( 'note' => "skipped
blank line");
+ $this->getResult()->addValue( null,
$this->getModuleName(),
+ array ( 'result' . $ctr =>
$result )
+ );
+ $this->continue = false;
+ } else {
+ $result = array ( 'note' => "non TSV
line `{$inputData}`");
+ $this->getResult()->addValue( null,
$this->getModuleName(),
+ array ( 'result' . $ctr =>
$result )
+ );
+ $this->continue = false;
+ }
+ }
+
+ if ( $this->continue ) {
+ if ( !is_numeric( $definedMeaningId ) ) {
+ if ($ctr == 1) {
+ $result = array ( 'note' =>
"$definedMeaningId is not an int or probably just the CSV header");
+ } else {
+ $result = array ( 'note' =>
"$definedMeaningId is not an int");
+ }
+ } else {
+ $result =
$this->processAddTextAttributeValues( $spelling, $language, $definedMeaningId,
$attribute, $attribLang, $text );
+ }
+
+ $this->getResult()->addValue( null,
$this->getModuleName(),
+ array ( 'result' . $ctr => $result )
+ );
+ }
+
+ }
+ return true;
+ }
+
+ private function processAddTextAttributeValues( $spelling = null,
$language = null, $definedMeaningId, $attribute, $attribLang, $text ) {
$dc = wdGetDataSetContext();
// if spelling is not null, process object as syntrans
@@ -446,6 +585,16 @@
)
);
}
+
+ // get DM expression for clarity
+ $definedMeaningLanguageId = WLD_ENGLISH_LANG_ID;
+ $syntrans = null;
+ if ( $spelling ) {
+ $definedMeaningLanguageId = $languageId;
+ $syntrans = "to expression:`{$spelling}` ";
+ }
+ $expression = getDefinedMeaningSpellingForLanguage(
$definedMeaningId, $definedMeaningLanguageId );
+
// Add values if does not exist
$valueId = getTextAttributeValueId( $this->objectId,
$this->attributeId, $text );
if ( !$valueId ) {
@@ -453,23 +602,32 @@
startNewTransaction( $this->getUser()->getID(),
"0.0.0.0", "Added using API function add_annotation", $dc);
$valueId = addTextAttributeValue(
$this->objectId, $this->attributeId, $text );
}
- $note['result'] = array(
+ $note = array(
'status' => 'added'
);
- if ( $value_id ) {
- $note['result']['value_id'] = $valueId;
+ if ( $valueId ) {
+ $note['value_id'] = $valueId;
}
- if ( $this->test ) {
+ if ( $this->wikipage ) {
+ $note['note'] = "{$attribute} `{$text}`
{$syntrans}for concept {$expression}({$definedMeaningId})";
+ }
+
+ if ( $this->test && !$this->wikipage ) {
$note['note'] = 'test run only';
}
} else {
- $note['result'] = array(
+ $note = array(
'status' => 'exists',
'value_id' => $valueId
);
- if ( $this->test ) {
+
+ if ( $this->wikipage ) {
+ $note['note'] = "{$attribute} `{$text}`
{$syntrans}for concept {$expression}({$definedMeaningId})";
+ }
+
+ if ( $this->test && !$this->wikipage ) {
$note['note'] = 'test run only';
}
}
--
To view, visit https://gerrit.wikimedia.org/r/93922
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iac98b20cef0f69af7f40eef172bb5634403c6073
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikiLexicalData
Gerrit-Branch: master
Gerrit-Owner: Hiong3-eng5 <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits