http://www.mediawiki.org/wiki/Special:Code/MediaWiki/70056

Revision: 70056
Author:   dfinzer
Date:     2010-07-27 22:57:49 +0000 (Tue, 27 Jul 2010)

Log Message:
-----------
Script to merge page history XML files (with overlapping time periods) to 
produce full page history

Added Paths:
-----------
    trunk/tools/analysis/StreamingXMLConcat.php

Added: trunk/tools/analysis/StreamingXMLConcat.php
===================================================================
--- trunk/tools/analysis/StreamingXMLConcat.php                         (rev 0)
+++ trunk/tools/analysis/StreamingXMLConcat.php 2010-07-27 22:57:49 UTC (rev 
70056)
@@ -0,0 +1,83 @@
+#!/usr/bin/php -q
+
+<?php
+
+date_default_timezone_set("UTC");
+
+if(count($argv) != 4){
+       print("\n\tUsage: $argv[0] <pasthist> <recenthist> <output> \n");
+       exit(-1);               
+}
+
+$concat = new XMLHistoryConcat($argv[1], $argv[2], $argv[3]);
+$concat->run();
+
+class XMLHistoryConcat{
+
+       public $pastFileName;
+       public $recentFileName;
+       public $outputFile;
+       public $newContent;
+       public $id;
+       
+       public function __construct($pastFN, $recentFN, $outputFN){
+               $this->pastFileName = $pastFN;
+               $this->recentFileName = $recentFN;
+               $this->outputFile = $outputFN;
+       }
+       
+       //gets new content and sets the overlap revision id
+       public function getNewContent($recentFN){
+               
+               $reader = new XMLReader();
+               $reader->open($recentFN);
+               $foundfirst = false;
+               $id = null;
+               
+               while ($reader->read()){
+                       if ($reader->nodeType == XMLREADER::ELEMENT
+                               && $reader->localName == "revision" && 
$foundfirst == false) {
+                                       $revision = new 
SimpleXMLElement($reader->readOuterXml());
+                                       $foundfirst = true;
+                                       $this->id = $revision->id;
+                       }
+                       if ($reader->nodeType == XMLREADER::ELEMENT
+                               && $reader->localName == "revision" && 
$foundfirst == true){
+                               $this->newContent .= 
$reader->readOuterXml()."\n";
+                       }
+               }
+       }
+       
+       public function run(){
+               $newfile = fopen($this->outputFile, "w");
+               fwrite($newfile, "<page>\n");
+               $reader = new XMLReader();
+               $reader->open($this->pastFileName);
+               $this->getNewContent($this->recentFileName);
+               
+               while ($reader->read()){
+                       //copy article title/id info
+                       if ($reader->nodeType == XMLREADER::ELEMENT
+                               && ($reader->localName == "title")){
+                                       fwrite($newfile, 
$reader->readOuterXml()."\n");
+                               }
+                       //write past content until overlap id
+                       if ($reader->nodeType == XMLREADER::ELEMENT
+                               && $reader->localName == "revision") {
+                                       $revision = new 
SimpleXMLElement($reader->readOuterXml());
+                                       if(strcmp($revision->id, $this->id) != 
0){
+                                               fwrite($newfile, 
$reader->readOuterXml()."\n");
+                                       }
+                                       else{
+                                               echo "Overlap at: ".$this->id;
+                                               break;
+                                       }
+                       }
+               }
+               
+               //write new content
+               fwrite($newfile, $this->newContent);
+               fwrite($newfile, "</page>");
+       }
+}
+?>
\ No newline at end of file


Property changes on: trunk/tools/analysis/StreamingXMLConcat.php
___________________________________________________________________
Added: svn:eol-style
   + native



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to