Brion VIBBER has submitted this change and it was merged.

Change subject: [WIP] Support reading and writing the 0.10 XML schema
......................................................................


[WIP] Support reading and writing the 0.10 XML schema

* bump package version to 1.25
* work around stray <sha1> under <page> on Wikia exports

TODO:
* Look into "deleted" attribute handling... I don't see how this information
can be read from dumps, nor is it written out.
* Needs tests.

Change-Id: I8715e97da855eb7d9d87c88f95e157e2af1fe784
---
M README
M pom.xml
M src/org/mediawiki/dumper/Dumper.java
M src/org/mediawiki/importer/DumpWriter.java
M src/org/mediawiki/importer/LatestFilter.java
M src/org/mediawiki/importer/MultiWriter.java
A src/org/mediawiki/importer/Namespace.java
M src/org/mediawiki/importer/NamespaceSet.java
M src/org/mediawiki/importer/Page.java
M src/org/mediawiki/importer/PageFilter.java
M src/org/mediawiki/importer/Revision.java
M src/org/mediawiki/importer/RevisionListFilter.java
M src/org/mediawiki/importer/Siteinfo.java
M src/org/mediawiki/importer/SphinxWriter.java
M src/org/mediawiki/importer/SqlWriter.java
M src/org/mediawiki/importer/TimeStampFilter.java
A src/org/mediawiki/importer/Wikiinfo.java
M src/org/mediawiki/importer/XmlDumpReader.java
C src/org/mediawiki/importer/XmlDumpWriter0_10.java
R src/org/mediawiki/importer/XmlDumpWriter0_3.java
M tests/org/mediawiki/importer/TitleTest.java
21 files changed, 283 insertions(+), 92 deletions(-)

Approvals:
  Brion VIBBER: Verified; Looks good to me, approved



diff --git a/README b/README
index 73af93f..5788290 100644
--- a/README
+++ b/README
@@ -76,6 +76,9 @@
   --format=xml
       Output back to MediaWiki's XML export format; use this for
       filtering dumps for limited import. Output should be idempotent.
+  --format=xml:0.3
+      Output in legacy 0.3 XML format; use with tools that can't handle
+      anything newer.
   --format=mysql:1.4
       SQL statements formatted for bulk import in MediaWiki 1.4's schema.
       (MySQL output format.)
diff --git a/pom.xml b/pom.xml
index 39bec4d..30e77b3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
   <name>mwdumper</name>
   <groupId>org.wikimedia</groupId>
   <artifactId>mwdumper</artifactId>
-  <version>1.16</version>
+  <version>1.25</version>
   <packaging>jar</packaging>
   <url>http://www.mediawiki.org/wiki/MWDumper</url>
 
diff --git a/src/org/mediawiki/dumper/Dumper.java 
b/src/org/mediawiki/dumper/Dumper.java
index 44b747a..177f42a 100644
--- a/src/org/mediawiki/dumper/Dumper.java
+++ b/src/org/mediawiki/dumper/Dumper.java
@@ -89,7 +89,7 @@
                                        if (output != null) {
                                                // Finish constructing the 
previous output...
                                                if (sink == null)
-                                                       sink = new 
XmlDumpWriter(output.getFileStream());
+                                                       sink = new 
XmlDumpWriter0_10(output.getFileStream());
                                                writers.add(sink);
                                                sink = null;
                                        }
@@ -104,7 +104,7 @@
                                        if (sink == null) {
                                                if (output == null)
                                                        output = new 
OutputWrapper(Tools.openStandardOutput());
-                                               sink = new 
XmlDumpWriter(output.getFileStream());
+                                               sink = new 
XmlDumpWriter0_10(output.getFileStream());
                                        }
                                        sink = addFilter(sink, val, param);
                                } else if (opt.equals("progress")) {
@@ -131,7 +131,7 @@
                        output = new OutputWrapper(Tools.openStandardOutput());
                // Finish stacking the last output sink
                if (sink == null)
-                       sink = new XmlDumpWriter(output.getFileStream());
+                       sink = new XmlDumpWriter0_10(output.getFileStream());
                writers.add(sink);
                
                DumpWriter outputSink = (progressInterval > 0)
@@ -237,11 +237,17 @@
        }
 
        static DumpWriter openOutputSink(OutputWrapper output, String format, 
String param) throws IOException {
-               if (format.equals("xml"))
-                       return new XmlDumpWriter(output.getFileStream());
-               else if (format.equals("sphinx"))
+               if (format.equals("xml")) {
+                       if (param.equals("0.3")) {
+                               return new 
XmlDumpWriter0_3(output.getFileStream());
+                       } else if (param.length() == 0 || param.equals("0.10")) 
{
+                               return new 
XmlDumpWriter0_10(output.getFileStream());
+                       } else {
+                               throw new IllegalArgumentException("XML schema 
version not known: " + param);
+                       }
+               } else if (format.equals("sphinx")) {
                        return new SphinxWriter(output.getFileStream());
-               else if (format.equals("mysql") || format.equals("pgsql") || 
format.equals("sql")) {
+               } else if (format.equals("mysql") || format.equals("pgsql") || 
format.equals("sql")) {
                        SqlStream sqlStream = output.getSqlStream();
                        SqlWriter ret;
 
@@ -259,8 +265,9 @@
                                throw new IllegalArgumentException("SQL version 
not known: " + param);
 
                        return ret;
-               } else
+               } else {
                        throw new IllegalArgumentException("Output format not 
known: " + format);
+               }
        }
        
        // ----------------
diff --git a/src/org/mediawiki/importer/DumpWriter.java 
b/src/org/mediawiki/importer/DumpWriter.java
index b857ddc..eb21f02 100644
--- a/src/org/mediawiki/importer/DumpWriter.java
+++ b/src/org/mediawiki/importer/DumpWriter.java
@@ -30,7 +30,7 @@
 public interface DumpWriter {
        void close() throws IOException;
        
-       void writeStartWiki() throws IOException;
+       void writeStartWiki(Wikiinfo info) throws IOException;
        void writeEndWiki() throws IOException;
        
        void writeSiteinfo(Siteinfo info) throws IOException;
diff --git a/src/org/mediawiki/importer/LatestFilter.java 
b/src/org/mediawiki/importer/LatestFilter.java
index e0e051f..709f5bd 100644
--- a/src/org/mediawiki/importer/LatestFilter.java
+++ b/src/org/mediawiki/importer/LatestFilter.java
@@ -39,8 +39,8 @@
                sink.close();
        }
        
-       public void writeStartWiki() throws IOException {
-               sink.writeStartWiki();
+       public void writeStartWiki(Wikiinfo info) throws IOException {
+               sink.writeStartWiki(info);
        }
        
        public void writeEndWiki() throws IOException {
diff --git a/src/org/mediawiki/importer/MultiWriter.java 
b/src/org/mediawiki/importer/MultiWriter.java
index 955cc8e..305cfdf 100644
--- a/src/org/mediawiki/importer/MultiWriter.java
+++ b/src/org/mediawiki/importer/MultiWriter.java
@@ -42,10 +42,10 @@
                }
        }
        
-       public void writeStartWiki() throws IOException {
+       public void writeStartWiki(Wikiinfo info) throws IOException {
                for (int i = 0; i < sinks.size(); i++) {
                        DumpWriter sink = sinks.get(i);
-                       sink.writeStartWiki();
+                       sink.writeStartWiki(info);
                }
        }
        
diff --git a/src/org/mediawiki/importer/Namespace.java 
b/src/org/mediawiki/importer/Namespace.java
new file mode 100644
index 0000000..1c44735
--- /dev/null
+++ b/src/org/mediawiki/importer/Namespace.java
@@ -0,0 +1,37 @@
+/*
+ * MediaWiki import/export processing tools
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+package org.mediawiki.importer;
+
+public class Namespace {
+       int Index;
+       String Prefix;
+       String Case;
+
+       public Namespace(int index, String prefix, String nscase) {
+               Index = index;
+               Prefix = prefix;
+               Case = nscase;
+       }
+}
diff --git a/src/org/mediawiki/importer/NamespaceSet.java 
b/src/org/mediawiki/importer/NamespaceSet.java
index fcc588b..11d350f 100644
--- a/src/org/mediawiki/importer/NamespaceSet.java
+++ b/src/org/mediawiki/importer/NamespaceSet.java
@@ -31,21 +31,21 @@
 import java.util.Iterator;
 
 public class NamespaceSet {
-       Map<String, Integer> byname;
-       Map<Integer, String> bynumber;
+       Map<String, Namespace> byname;
+       Map<Integer, Namespace> bynumber;
        
        public NamespaceSet() {
-               byname = new HashMap<String, Integer>();
-               bynumber = new LinkedHashMap<Integer, String>();
+               byname = new HashMap<String, Namespace>();
+               bynumber = new LinkedHashMap<Integer, Namespace>();
        }
        
-       public void add(int index, String prefix) {
-               add(new Integer(index), prefix);
+       public void add(int index, String prefix, String nscase) {
+               add(new Integer(index), prefix, nscase);
        }
        
-       public void add(Integer index, String prefix) {
-               byname.put(prefix, index);
-               bynumber.put(index, prefix);
+       public void add(Integer index, String prefix, String nscase) {
+               byname.put(prefix, new Namespace(index, prefix, nscase));
+               bynumber.put(index, new Namespace(index, prefix, nscase));
        }
        
        public boolean hasPrefix(String prefix) {
@@ -57,11 +57,11 @@
        }
        
        public String getPrefix(Integer index) {
-               return bynumber.get(index);
+               return bynumber.get(index).Prefix;
        }
        
        public Integer getIndex(String prefix) {
-               return byname.get(prefix);
+               return byname.get(prefix).Index;
        }
        
        public String getColonPrefix(Integer index) {
@@ -71,7 +71,7 @@
                return prefix;
        }
        
-       public Iterator<Map.Entry<Integer, String>>  orderedEntries() {
+       public Iterator<Map.Entry<Integer, Namespace>>  orderedEntries() {
                return bynumber.entrySet().iterator();
        }
 }
diff --git a/src/org/mediawiki/importer/Page.java 
b/src/org/mediawiki/importer/Page.java
index e8a1cae..edc21c9 100644
--- a/src/org/mediawiki/importer/Page.java
+++ b/src/org/mediawiki/importer/Page.java
@@ -29,7 +29,9 @@
 
 public class Page {
        public Title Title;
+       public int Ns;
        public int Id;
+       public Title Redirect;
        public boolean isRedirect = false;
        public Hashtable<String,Object> DiscussionThreadingInfo;
        public String Restrictions;
diff --git a/src/org/mediawiki/importer/PageFilter.java 
b/src/org/mediawiki/importer/PageFilter.java
index f831f76..d1338b1 100644
--- a/src/org/mediawiki/importer/PageFilter.java
+++ b/src/org/mediawiki/importer/PageFilter.java
@@ -39,8 +39,8 @@
                sink.close();
        }
        
-       public void writeStartWiki() throws IOException {
-               sink.writeStartWiki();
+       public void writeStartWiki(Wikiinfo info) throws IOException {
+               sink.writeStartWiki(info);
        }
        
        public void writeEndWiki() throws IOException {
diff --git a/src/org/mediawiki/importer/Revision.java 
b/src/org/mediawiki/importer/Revision.java
index 7b59550..9d6f695 100644
--- a/src/org/mediawiki/importer/Revision.java
+++ b/src/org/mediawiki/importer/Revision.java
@@ -29,13 +29,19 @@
 
 public class Revision {
        public int Id;
+       public int Parentid;
        public Calendar Timestamp;
        public Contributor Contributor;
        public String Comment;
+       public String Model;
+       public String Format;
        public String Text;
+       public Integer Bytes;
+       public String Sha1;
        public boolean Minor;
        
        public Revision() {
+               Parentid = 0;
                Comment = "";
                Text = "";
                Minor = false;
diff --git a/src/org/mediawiki/importer/RevisionListFilter.java 
b/src/org/mediawiki/importer/RevisionListFilter.java
index c763b22..44f4f13 100644
--- a/src/org/mediawiki/importer/RevisionListFilter.java
+++ b/src/org/mediawiki/importer/RevisionListFilter.java
@@ -58,8 +58,8 @@
                sink.close();
        }
        
-       public void writeStartWiki() throws IOException {
-               sink.writeStartWiki();
+       public void writeStartWiki(Wikiinfo info) throws IOException {
+               sink.writeStartWiki(info);
        }
        
        public void writeEndWiki() throws IOException {
diff --git a/src/org/mediawiki/importer/Siteinfo.java 
b/src/org/mediawiki/importer/Siteinfo.java
index eec4579..27364c5 100644
--- a/src/org/mediawiki/importer/Siteinfo.java
+++ b/src/org/mediawiki/importer/Siteinfo.java
@@ -27,6 +27,7 @@
 
 public class Siteinfo {
        public String Sitename;
+       public String Dbname;
        public String Base;
        public String Generator;
        public String Case;
diff --git a/src/org/mediawiki/importer/SphinxWriter.java 
b/src/org/mediawiki/importer/SphinxWriter.java
index c0236a0..c5ffadb 100644
--- a/src/org/mediawiki/importer/SphinxWriter.java
+++ b/src/org/mediawiki/importer/SphinxWriter.java
@@ -47,7 +47,7 @@
                writer.close();
        }
        
-       public void writeStartWiki() throws IOException {
+       public void writeStartWiki(Wikiinfo info) throws IOException {
                writer.openXml();
                // No containing element to open
        }
diff --git a/src/org/mediawiki/importer/SqlWriter.java 
b/src/org/mediawiki/importer/SqlWriter.java
index 74c0b17..a155157 100644
--- a/src/org/mediawiki/importer/SqlWriter.java
+++ b/src/org/mediawiki/importer/SqlWriter.java
@@ -124,8 +124,9 @@
                stream.close();
        }
 
-       public void writeStartWiki() throws IOException {
+       public void writeStartWiki(Wikiinfo info) throws IOException {
                stream.writeComment("-- MediaWiki XML dump converted to SQL by 
mwdumper");
+               stream.writeComment("-- Lang: " + commentSafe(info.Lang));
                stream.writeStatement("BEGIN");
 
                String prologue = traits.getWikiPrologue();
@@ -146,14 +147,15 @@
        public void writeSiteinfo(Siteinfo info) throws IOException {
                stream.writeComment("");
                stream.writeComment("-- Site: " + commentSafe(info.Sitename));
+               stream.writeComment("-- DB name: " + commentSafe(info.Dbname));
                stream.writeComment("-- URL: " + commentSafe(info.Base));
                stream.writeComment("-- Generator: " + 
commentSafe(info.Generator));
                stream.writeComment("-- Case: " + commentSafe(info.Case));
                stream.writeComment("--");
                stream.writeComment("-- Namespaces:");
-               for (Iterator<Map.Entry<Integer, String>>  i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
-                       Map.Entry<Integer, String> e = i.next();
-                       stream.writeComment("-- " + e.getKey() + ": " + 
e.getValue());
+               for (Iterator<Map.Entry<Integer, Namespace>>  i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
+                       Map.Entry<Integer, Namespace> e = i.next();
+                       stream.writeComment("-- " + e.getKey() + ": " + 
e.getValue().Prefix);
                }
                stream.writeComment("");
        }
diff --git a/src/org/mediawiki/importer/TimeStampFilter.java 
b/src/org/mediawiki/importer/TimeStampFilter.java
index ab7ec0a..50c0c63 100644
--- a/src/org/mediawiki/importer/TimeStampFilter.java
+++ b/src/org/mediawiki/importer/TimeStampFilter.java
@@ -45,8 +45,8 @@
                sink.close();
        }
 
-       public void writeStartWiki() throws IOException {
-               sink.writeStartWiki();
+       public void writeStartWiki(Wikiinfo info) throws IOException {
+               sink.writeStartWiki(info);
        }
 
        public void writeEndWiki() throws IOException {
diff --git a/src/org/mediawiki/importer/Wikiinfo.java 
b/src/org/mediawiki/importer/Wikiinfo.java
new file mode 100644
index 0000000..ea4a94f
--- /dev/null
+++ b/src/org/mediawiki/importer/Wikiinfo.java
@@ -0,0 +1,29 @@
+/*
+ * MediaWiki import/export processing tools
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+package org.mediawiki.importer;
+
+public class Wikiinfo {
+       public String Lang;
+}
diff --git a/src/org/mediawiki/importer/XmlDumpReader.java 
b/src/org/mediawiki/importer/XmlDumpReader.java
index f91f9d1..8431b5e 100644
--- a/src/org/mediawiki/importer/XmlDumpReader.java
+++ b/src/org/mediawiki/importer/XmlDumpReader.java
@@ -50,12 +50,14 @@
        private boolean hasContent = false;
        private boolean deleted = false;
        
+       Wikiinfo wikiinfo;
        Siteinfo siteinfo;
        Page page;
        boolean pageSent;
        Contributor contrib;
        Revision rev;
        int nskey;
+       String nscase;
        
        boolean abortFlag;
        
@@ -116,6 +118,8 @@
                startElements.put("siteinfo","siteinfo");
                startElements.put("namespaces","namespaces");
                startElements.put("namespace","namespace");
+               startElements.put("redirect","redirect");
+               startElements.put("text","text");
 
                endElements.put("ThreadSubject","ThreadSubject");
                endElements.put("ThreadParent","ThreadParent");
@@ -129,18 +133,23 @@
                endElements.put("base","base");
                endElements.put("case","case");
                endElements.put("comment","comment");
+               endElements.put("dbname","dbname");
                endElements.put("contributor","contributor");
+               endElements.put("format","format");
                endElements.put("generator","generator");
                endElements.put("id","id");
                endElements.put("ip","ip");
                endElements.put("mediawiki", "mediawiki");
                endElements.put("minor","minor");
+               endElements.put("model","model");
                endElements.put("namespaces","namespaces");
                endElements.put("namespace","namespace");
+               endElements.put("ns","ns");
                endElements.put("page","page");
-               endElements.put("redirect","redirect");
+               endElements.put("parentid","parentid");
                endElements.put("restrictions","restrictions");
                endElements.put("revision","revision");
+               endElements.put("sha1","sha1");
                endElements.put("siteinfo","siteinfo");
                endElements.put("sitename","sitename");
                endElements.put("text","text");
@@ -161,6 +170,7 @@
 
                // check for deleted="deleted", and set deleted flag for the 
current element. 
                String d = attributes.getValue("deleted");
+               // FIXME: I'm not sure the deleted attribute is preserved in 
our objects?
                deleted = (d!=null && d.equals("deleted")); 
                
                try {
@@ -171,8 +181,10 @@
                        if (qName == "revision") openRevision();
                        else if (qName == "contributor") openContributor();
                        else if (qName == "page") openPage();
+                       else if (qName == "redirect") openRedirect(attributes);
+                       else if (qName == "text") openText(attributes);
                        // rare tags:
-                       else if (qName == "mediawiki") openMediaWiki();
+                       else if (qName == "mediawiki") 
openMediaWiki(attributes);
                        else if (qName == "siteinfo") openSiteinfo();
                        else if (qName == "namespaces") openNamespaces();
                        else if (qName == "namespace") 
openNamespace(attributes);
@@ -203,6 +215,7 @@
                        // frequent tags:
                        if (qName == "id") readId();
                        else if (qName == "revision") closeRevision();
+                       else if (qName == "parentid") readParentid();
                        else if (qName == "timestamp") readTimestamp();
                        else if (qName == "text") readText();
                        else if (qName == "contributor") closeContributor();
@@ -210,15 +223,19 @@
                        else if (qName == "ip") readIp();
                        else if (qName == "comment") readComment();
                        else if (qName == "minor") readMinor();
+                       else if (qName == "model") readModel();
+                       else if (qName == "format") readFormat();
+                       else if (qName == "sha1") readSha1();
                        else if (qName == "page") closePage();
                        else if (qName == "title") readTitle();
+                       else if (qName == "ns") readNs();
                        else if (qName == "restrictions") readRestrictions();
-                       else if (qName == "redirect") readRedirect();
                        // rare tags:
                        else if (qName.startsWith("Thread")) 
threadAttribute(qName);
                        else if (qName == "mediawiki") closeMediaWiki();
                        else if (qName == "siteinfo") closeSiteinfo();
                        else if (qName == "sitename") readSitename();
+                       else if (qName == "dbname") readDbname();
                        else if (qName == "base") readBase();
                        else if (qName == "generator") readGenerator();
                        else if (qName == "case") readCase();
@@ -239,9 +256,16 @@
                        page.DiscussionThreadingInfo.put(attrib, 
bufferContents());
        }
        
-       void openMediaWiki() throws IOException {
+       void openMediaWiki(Attributes attributes) throws IOException {
                siteinfo = null;
-               writer.writeStartWiki();
+
+               wikiinfo = new Wikiinfo();
+               wikiinfo.Lang = attributes.getValue("xml:lang");
+               if (wikiinfo.Lang.length() == 0) {
+                       wikiinfo.Lang = "en";
+               }
+
+               writer.writeStartWiki(wikiinfo);
        }
        
        void closeMediaWiki() throws IOException {
@@ -272,6 +296,10 @@
                siteinfo.Sitename = bufferContents();
        }
        
+       void readDbname() {
+               siteinfo.Dbname = bufferContents();
+       }
+
        void readBase() {
                siteinfo.Base = bufferContents();
        }
@@ -290,10 +318,11 @@
        
        void openNamespace(Attributes attribs) {
                nskey = Integer.parseInt(attribs.getValue("key"));
+               nscase = attribs.getValue("case");
        }
        
        void closeNamespace() {
-               siteinfo.Namespaces.add(nskey, bufferContents());
+               siteinfo.Namespaces.add(nskey, bufferContents(), nscase);
        }
 
        void closeNamespaces() {
@@ -316,7 +345,11 @@
        void readTitle() {
                page.Title = new Title(bufferContents(), siteinfo.Namespaces);
        }
-       
+
+       void readNs() {
+               page.Ns = Integer.parseInt(bufferContents());
+       }
+
        void readId() {
                int id = Integer.parseInt(bufferContents());
                if (contrib != null) 
@@ -328,11 +361,15 @@
                else
                        throw new IllegalArgumentException("Unexpected <id> 
outside a <page>, <revision>, or <contributor>");
        }
-       
-       void readRedirect() {
+
+       void openRedirect(Attributes attributes) {
+               String title = attributes.getValue("title");
+               if (title != null) {
+                       page.Redirect = new Title(title, siteinfo.Namespaces);
+               }
                page.isRedirect = true;
        }
-       
+
        void readRestrictions() {
                page.Restrictions = bufferContents();
        }
@@ -353,6 +390,10 @@
                rev = null;
        }
 
+       void readParentid() {
+               rev.Parentid = Integer.parseInt(bufferContents());
+       }
+
        void readTimestamp() {
                rev.Timestamp = parseUTCTimestamp(bufferContents());
        }
@@ -366,10 +407,38 @@
                rev.Minor = true;
        }
 
+       void readModel() {
+               rev.Model = bufferContents();
+       }
+
+       void readFormat() {
+               rev.Format = bufferContents();
+       }
+
+       void openText(Attributes attributes) {
+               String bytes = attributes.getValue("bytes");
+               if (bytes != null) {
+                       rev.Bytes = new Integer(bytes);
+               }
+       }
+
        void readText() {
                rev.Text = bufferContentsOrNull();
                if (rev.Text==null && !deleted) rev.Text = ""; //NOTE: null 
means deleted/supressed
        }
+
+       void readSha1() {
+               String sha1 = bufferContents();
+
+               if (rev != null) {
+                       rev.Sha1 = sha1;
+               } else if (page != null) {
+                       // Wikia running MW 1.19 emits a <sha1> directly under 
<page>;
+                       // ignore it for now as it's unclear how to map it.
+               } else {
+                       throw new IllegalArgumentException("Unexpected <id> 
outside a <revision>");
+               }
+       }
        
        // -----------
        void openContributor() {
diff --git a/src/org/mediawiki/importer/XmlDumpWriter.java 
b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
similarity index 70%
copy from src/org/mediawiki/importer/XmlDumpWriter.java
copy to src/org/mediawiki/importer/XmlDumpWriter0_10.java
index 872dab7..65010da 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
@@ -29,24 +29,25 @@
 import java.io.OutputStream;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TimeZone;
 
-public class XmlDumpWriter implements DumpWriter {
+public class XmlDumpWriter0_10 implements DumpWriter {
        protected OutputStream stream;
        protected XmlWriter writer;
        
-       protected static final String version = "0.3";
+       protected static final String version = "0.10";
        protected static final String ns = 
"http://www.mediawiki.org/xml/export-"; + version + "/";
        protected static final String schema = 
"http://www.mediawiki.org/xml/export-"; + version + ".xsd";
        protected static final DateFormat dateFormat = new 
SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'");
        static {
                dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
        }
-       
-       public XmlDumpWriter(OutputStream output) throws IOException {
+
+       public XmlDumpWriter0_10(OutputStream output) throws IOException {
                stream = output;
                writer = new XmlWriter(stream);
        }
@@ -55,15 +56,14 @@
                writer.close();
        }
        
-       public void writeStartWiki() throws IOException {
+       public void writeStartWiki(Wikiinfo info) throws IOException {
                writer.openXml();
                writer.openElement("mediawiki", new String[][] {
                        {"xmlns", ns},
                        {"xmlns:xsi", 
"http://www.w3.org/2001/XMLSchema-instance"},
                        {"xsi:schemaLocation", ns + " " + schema},
                        {"version", version},
-                       {"xml:lang", "en"}});
-               // TODO: store and keep the xml:lang
+                       {"xml:lang", info.Lang}});
        }
        
        public void writeEndWiki() throws IOException {
@@ -75,15 +75,25 @@
                XmlWriter writer = this.writer;
                writer.openElement("siteinfo");
                writer.textElement("sitename", info.Sitename);
+               if (info.Dbname != null) {
+                       writer.textElement("dbname", info.Dbname);
+               }
                writer.textElement("base", info.Base);
                writer.textElement("generator", info.Generator);
                writer.textElement("case", info.Case);
                
                writer.openElement("namespaces");
-               for (Iterator<Map.Entry<Integer, String>> i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
-                       Map.Entry<Integer, String> e = i.next();
-                       writer.textElement("namespace", 
e.getValue().toString(), new String[][] {
-                                       {"key", e.getKey().toString()}});
+               for (Iterator<Map.Entry<Integer, Namespace>> i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
+                       Map.Entry<Integer, Namespace> e = i.next();
+
+                       ArrayList<String[]> textAttribs = new 
ArrayList<String[]>();
+                       textAttribs.add(new String[] {"key", 
e.getKey().toString()});
+                       if (e.getValue().Case != null) {
+                               textAttribs.add(new String[] {"case", 
e.getValue().Case});
+                       }
+                       writer.textElement("namespace", e.getValue().Prefix,
+                               textAttribs.toArray(new String[][] {})
+                       );
                }
                writer.closeElement();
                
@@ -94,8 +104,15 @@
                XmlWriter writer = this.writer;
                writer.openElement("page");
                writer.textElement("title", page.Title.toString());
-               if (page.Id != 0)
+               writer.textElement("ns", Integer.toString(page.Ns));
+               if (page.Id != 0) {
                        writer.textElement("id", Integer.toString(page.Id));
+               }
+               if (page.Redirect != null) {
+                       writer.emptyElement("redirect", new String[][] {
+                               {"title", page.Redirect.toString()}
+                       });
+               }
                if (page.Restrictions != null && page.Restrictions.length() != 
0)
                        writer.textElement("restrictions", page.Restrictions);
        }
@@ -107,8 +124,12 @@
        public void writeRevision(Revision rev) throws IOException {
                XmlWriter writer = this.writer;
                writer.openElement("revision");
-               if (rev.Id != 0)
+               if (rev.Id != 0) {
                        writer.textElement("id", Integer.toString(rev.Id));
+               }
+               if (rev.Parentid != 0) {
+                       writer.textElement("parentid", 
Integer.toString(rev.Parentid));
+               }
                
                writer.textElement("timestamp", formatTimestamp(rev.Timestamp));
                
@@ -120,15 +141,30 @@
                
                if (rev.Comment == null) {
                        writer.emptyElement("comment", deletedAttrib);
-               } 
+               }
                else if (rev.Comment.length() != 0) {
                        writer.textElement("comment", rev.Comment);
                }
-               
-               writer.textElement("text", rev.Text, 
-                               rev.Text==null ? new String[][] {{"xml:space", 
"preserve"}, {"deleted", "deleted"}} 
-                                                                               
                : new String[][] {{"xml:space", "preserve"}}
-               );
+
+               if (rev.Model != null) {
+                       writer.textElement("model", rev.Model);
+               }
+               if (rev.Format != null) {
+                       writer.textElement("format", rev.Format);
+               }
+
+               ArrayList<String[]> textAttribs = new ArrayList<String[]>();
+               textAttribs.add(new String[] {"xml:space", "preserve"});
+               if (rev.Text == null) {
+                       textAttribs.add(new String[] {"deleted", "deleted"});
+               } else if (rev.Bytes != null) {
+                       textAttribs.add(new String[] {"bytes", 
rev.Bytes.toString()});
+               }
+               writer.textElement("text", rev.Text, textAttribs.toArray(new 
String[][] {}));
+
+               if (rev.Sha1 != null) {
+                       writer.textElement("sha1", rev.Sha1);
+               }
                
                writer.closeElement();
        }
diff --git a/src/org/mediawiki/importer/XmlDumpWriter.java 
b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
similarity index 90%
rename from src/org/mediawiki/importer/XmlDumpWriter.java
rename to src/org/mediawiki/importer/XmlDumpWriter0_3.java
index 872dab7..40cdbb5 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
@@ -34,7 +34,7 @@
 import java.util.Map;
 import java.util.TimeZone;
 
-public class XmlDumpWriter implements DumpWriter {
+public class XmlDumpWriter0_3 implements DumpWriter {
        protected OutputStream stream;
        protected XmlWriter writer;
        
@@ -46,7 +46,7 @@
                dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
        }
        
-       public XmlDumpWriter(OutputStream output) throws IOException {
+       public XmlDumpWriter0_3(OutputStream output) throws IOException {
                stream = output;
                writer = new XmlWriter(stream);
        }
@@ -55,15 +55,14 @@
                writer.close();
        }
        
-       public void writeStartWiki() throws IOException {
+       public void writeStartWiki(Wikiinfo info) throws IOException {
                writer.openXml();
                writer.openElement("mediawiki", new String[][] {
                        {"xmlns", ns},
                        {"xmlns:xsi", 
"http://www.w3.org/2001/XMLSchema-instance"},
                        {"xsi:schemaLocation", ns + " " + schema},
                        {"version", version},
-                       {"xml:lang", "en"}});
-               // TODO: store and keep the xml:lang
+                       {"xml:lang", info.Lang}});
        }
        
        public void writeEndWiki() throws IOException {
@@ -80,9 +79,9 @@
                writer.textElement("case", info.Case);
                
                writer.openElement("namespaces");
-               for (Iterator<Map.Entry<Integer, String>> i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
-                       Map.Entry<Integer, String> e = i.next();
-                       writer.textElement("namespace", 
e.getValue().toString(), new String[][] {
+               for (Iterator<Map.Entry<Integer, Namespace>> i = 
info.Namespaces.orderedEntries(); i.hasNext();) {
+                       Map.Entry<Integer, Namespace> e = i.next();
+                       writer.textElement("namespace", e.getValue().Prefix, 
new String[][] {
                                        {"key", e.getKey().toString()}});
                }
                writer.closeElement();
diff --git a/tests/org/mediawiki/importer/TitleTest.java 
b/tests/org/mediawiki/importer/TitleTest.java
index e9d723b..2fbc49d 100644
--- a/tests/org/mediawiki/importer/TitleTest.java
+++ b/tests/org/mediawiki/importer/TitleTest.java
@@ -37,24 +37,24 @@
        protected void setUp() throws Exception {
                super.setUp();
                namespaces = new NamespaceSet();
-               namespaces.add(-2, "Media");
-               namespaces.add(-1, "Special");
-               namespaces.add(0, "");
-               namespaces.add(1, "Talk");
-               namespaces.add(2, "User");
-               namespaces.add(3, "User talk");
-               namespaces.add(4, "Project");
-               namespaces.add(5, "Project talk");
-               namespaces.add(6, "Image");
-               namespaces.add(7, "Image talk");
-               namespaces.add(8, "MediaWiki");
-               namespaces.add(9, "MediaWiki talk");
-               namespaces.add(10, "Template");
-               namespaces.add(11, "Template talk");
-               namespaces.add(12, "Help");
-               namespaces.add(13, "Help talk");
-               namespaces.add(14, "Category");
-               namespaces.add(15, "Category talk");
+               namespaces.add(-2, "Media", "first-letter");
+               namespaces.add(-1, "Special", "first-letter");
+               namespaces.add(0, "", "first-letter");
+               namespaces.add(1, "Talk", "first-letter");
+               namespaces.add(2, "User", "first-letter");
+               namespaces.add(3, "User talk", "first-letter");
+               namespaces.add(4, "Project", "first-letter");
+               namespaces.add(5, "Project talk", "first-letter");
+               namespaces.add(6, "Image", "first-letter");
+               namespaces.add(7, "Image talk", "first-letter");
+               namespaces.add(8, "MediaWiki", "first-letter");
+               namespaces.add(9, "MediaWiki talk", "first-letter");
+               namespaces.add(10, "Template", "first-letter");
+               namespaces.add(11, "Template talk", "first-letter");
+               namespaces.add(12, "Help", "first-letter");
+               namespaces.add(13, "Help talk", "first-letter");
+               namespaces.add(14, "Category", "first-letter");
+               namespaces.add(15, "Category talk", "first-letter");
        }
 
        protected void tearDown() throws Exception {

-- 
To view, visit https://gerrit.wikimedia.org/r/192174
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8715e97da855eb7d9d87c88f95e157e2af1fe784
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/tools/mwdumper
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: Awight <[email protected]>
Gerrit-Reviewer: Brion VIBBER <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Diederik <[email protected]>
Gerrit-Reviewer: Martineznovo <[email protected]>
Gerrit-Reviewer: Oren <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to