Awight has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/192739

Change subject: Use 64-bit integers to store revision ID
......................................................................

Use 64-bit integers to store revision ID

Enwiki is already at max(rev_id) = 1.2 x 2^29.  Use Long everywhere
when handling revision IDs.

Change-Id: Ibfa4564d3b5a00a61546600977f2074085012c49
---
M src/org/mediawiki/dumper/ProgressFilter.java
M src/org/mediawiki/importer/Revision.java
M src/org/mediawiki/importer/RevisionListFilter.java
M src/org/mediawiki/importer/SqlWriter14.java
M src/org/mediawiki/importer/SqlWriter15.java
M src/org/mediawiki/importer/SqlWriter1_25.java
M src/org/mediawiki/importer/XmlDumpReader.java
M src/org/mediawiki/importer/XmlDumpWriter0_10.java
M src/org/mediawiki/importer/XmlDumpWriter0_3.java
9 files changed, 28 insertions(+), 29 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/mwdumper 
refs/changes/39/192739/1

diff --git a/src/org/mediawiki/dumper/ProgressFilter.java 
b/src/org/mediawiki/dumper/ProgressFilter.java
index 01ce837..9d890f4 100644
--- a/src/org/mediawiki/dumper/ProgressFilter.java
+++ b/src/org/mediawiki/dumper/ProgressFilter.java
@@ -35,7 +35,7 @@
 
 public class ProgressFilter extends PageFilter {
        int pages = 0;
-       int revisions = 0;
+       long revisions = 0;
        int interval = 1000;
        MessageFormat format = new MessageFormat("{0} pages ({1}/sec), {2} revs 
({3}/sec)");
        long start = System.currentTimeMillis();
@@ -79,7 +79,7 @@
                sendOutput(format.format(new Object[] {
                        new Integer(pages),
                        rate(delta, pages),
-                       new Integer(revisions),
+                       new Long(revisions),
                        rate(delta, revisions)}));
        }
        
@@ -87,7 +87,7 @@
                System.err.println(text);               
        }
 
-       private static Object rate(long delta, int count) {
+       private static Object rate(long delta, long count) {
                return (delta > 0.001)
                        ? (Object)new Double(1000.0 * (double)count / 
(double)delta)
                        : (Object)"-";
diff --git a/src/org/mediawiki/importer/Revision.java 
b/src/org/mediawiki/importer/Revision.java
index 9d6f695..9995194 100644
--- a/src/org/mediawiki/importer/Revision.java
+++ b/src/org/mediawiki/importer/Revision.java
@@ -28,8 +28,8 @@
 import java.util.Calendar;
 
 public class Revision {
-       public int Id;
-       public int Parentid;
+       public long Id;
+       public long Parentid;
        public Calendar Timestamp;
        public Contributor Contributor;
        public String Comment;
diff --git a/src/org/mediawiki/importer/RevisionListFilter.java 
b/src/org/mediawiki/importer/RevisionListFilter.java
index 44f4f13..96aab6d 100644
--- a/src/org/mediawiki/importer/RevisionListFilter.java
+++ b/src/org/mediawiki/importer/RevisionListFilter.java
@@ -25,7 +25,6 @@
 
 package org.mediawiki.importer;
 
-import java.lang.Integer;
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -34,20 +33,20 @@
 
 public class RevisionListFilter implements DumpWriter {
        DumpWriter sink;
-       protected TreeSet<Integer> revIds;
+       protected TreeSet<Long> revIds;
        protected Page currentPage;
        protected boolean pageWritten;
        
        public RevisionListFilter(DumpWriter sink, String sourceFileName) 
throws IOException {
                this.sink = sink;
-               revIds = new TreeSet<Integer>();
+               revIds = new TreeSet<Long>();
                BufferedReader input = new BufferedReader(new InputStreamReader(
                        new FileInputStream(sourceFileName), "utf-8"));
                String line = input.readLine();
                while (line != null) {
                        line = line.trim();
                        if (line.length() > 0 && !line.startsWith("#")) {
-                               revIds.add(new Integer(line));
+                               revIds.add(new Long(line));
                        }
                        line = input.readLine();
                }
@@ -82,7 +81,7 @@
        }
        
        public void writeRevision(Revision revision) throws IOException {
-               if (revIds.contains(new Integer(revision.Id))) {
+               if (revIds.contains(new Long(revision.Id))) {
                        if (!pageWritten) {
                                sink.writeStartPage(currentPage);
                                pageWritten = true;
diff --git a/src/org/mediawiki/importer/SqlWriter14.java 
b/src/org/mediawiki/importer/SqlWriter14.java
index 2585c4d..494ce5e 100644
--- a/src/org/mediawiki/importer/SqlWriter14.java
+++ b/src/org/mediawiki/importer/SqlWriter14.java
@@ -60,7 +60,7 @@
        
        private void writeOldRevision(Page page, Revision revision) throws 
IOException {
                bufferInsertRow("old", new Object[][] {
-                               {"old_id", new Integer(revision.Id)},
+                               {"old_id", new Long(revision.Id)},
                                {"old_namespace", page.Title.Namespace},
                                {"old_title", titleFormat(page.Title.Text)},
                                {"old_text", revision.Text == null ? "" : 
revision.Text},
diff --git a/src/org/mediawiki/importer/SqlWriter15.java 
b/src/org/mediawiki/importer/SqlWriter15.java
index f57473d..50873e3 100644
--- a/src/org/mediawiki/importer/SqlWriter15.java
+++ b/src/org/mediawiki/importer/SqlWriter15.java
@@ -66,7 +66,7 @@
        
        public void writeRevision(Revision revision) throws IOException {
                bufferInsertRow(traits.getTextTable(), new Object[][] {
-                               {"old_id", new Integer(revision.Id)},
+                               {"old_id", new Long(revision.Id)},
                                {"old_text", revision.Text == null ? "" : 
revision.Text},
                                {"old_flags", "utf-8"}});
                
@@ -76,9 +76,9 @@
                if (revision.Text==null) rev_deleted |= DELETED_TEXT;
 
                bufferInsertRow("revision", new Object[][] {
-                               {"rev_id", new Integer(revision.Id)},
+                               {"rev_id", new Long(revision.Id)},
                                {"rev_page", new Integer(currentPage.Id)},
-                               {"rev_text_id", new Integer(revision.Id)},
+                               {"rev_text_id", new Long(revision.Id)},
                                {"rev_comment", 
commentFormat(revision.Comment)},
                                {"rev_user", revision.Contributor.Username == 
null ? ZERO :  new Integer(revision.Contributor.Id)},
                                {"rev_user_text", revision.Contributor.Username 
== null ? "" : revision.Contributor.Username},
@@ -100,7 +100,7 @@
                                {"page_is_new", ZERO},
                                {"page_random", traits.getRandom()},
                                {"page_touched", traits.getCurrentTime()},
-                               {"page_latest", new Integer(revision.Id)},
+                               {"page_latest", new Long(revision.Id)},
                                {"page_len", revision.Bytes}});
                checkpoint();
        }
diff --git a/src/org/mediawiki/importer/SqlWriter1_25.java 
b/src/org/mediawiki/importer/SqlWriter1_25.java
index a24696c..54d09e8 100644
--- a/src/org/mediawiki/importer/SqlWriter1_25.java
+++ b/src/org/mediawiki/importer/SqlWriter1_25.java
@@ -32,11 +32,11 @@
        public SqlWriter1_25(SqlWriter.Traits tr, SqlStream output) {
                super(tr, output);
        }
-       
+
        public SqlWriter1_25(SqlWriter.Traits tr, SqlStream output, String 
prefix) {
                super(tr, output, prefix);
        }
-       
+
        protected void updatePage(Page page, Revision revision) throws 
IOException {
                bufferInsertRow("page", new Object[][] {
                                {"page_id", new Integer(page.Id)},
@@ -47,7 +47,7 @@
                                {"page_is_new", ZERO},
                                {"page_random", traits.getRandom()},
                                {"page_touched", traits.getCurrentTime()},
-                               {"page_latest", new Integer(revision.Id)},
+                               {"page_latest", new Long(revision.Id)},
                                {"page_len", revision.Bytes},
                                {"page_content_model", revision.Model},
                });
@@ -64,32 +64,32 @@
 
        public void writeRevision(Revision revision) throws IOException {
                bufferInsertRow(traits.getTextTable(), new Object[][] {
-                               {"old_id", new Integer(revision.Id)},
+                               {"old_id", new Long(revision.Id)},
                                {"old_text", revision.Text == null ? "" : 
revision.Text},
                                {"old_flags", "utf-8"}});
-               
+
                int rev_deleted = 0;
                if (revision.Contributor.Username==null) rev_deleted |= 
DELETED_USER;
                if (revision.Comment==null) rev_deleted |= DELETED_COMMENT;
                if (revision.Text==null) rev_deleted |= DELETED_TEXT;
 
                bufferInsertRow("revision", new Object[][] {
-                               {"rev_id", new Integer(revision.Id)},
+                               {"rev_id", new Long(revision.Id)},
                                {"rev_page", new Integer(currentPage.Id)},
-                               {"rev_text_id", new Integer(revision.Id)},
+                               {"rev_text_id", new Long(revision.Id)},
                                {"rev_comment", 
commentFormat(revision.Comment)},
                                {"rev_user", revision.Contributor.Username == 
null ? ZERO :  new Integer(revision.Contributor.Id)},
                                {"rev_user_text", revision.Contributor.Username 
== null ? "" : revision.Contributor.Username},
                                {"rev_timestamp", 
timestampFormat(revision.Timestamp)},
                                {"rev_minor_edit", revision.Minor ? ONE : ZERO},
-                               {"rev_parent_id", revision.Parentid == 0 ? null 
: new Integer(revision.Parentid)},
+                               {"rev_parent_id", revision.Parentid == 0 ? null 
: new Long(revision.Parentid)},
                                {"rev_sha1", revision.Sha1},
                                {"rev_content_model", revision.Model},
                                {"rev_content_format", revision.Format},
                                {"rev_deleted", rev_deleted==0 ? ZERO : new 
Integer(rev_deleted)},
                                {"rev_len", revision.Bytes},
                });
-               
+
                lastRevision = revision;
        }
 }
diff --git a/src/org/mediawiki/importer/XmlDumpReader.java 
b/src/org/mediawiki/importer/XmlDumpReader.java
index d5df43e..506078a 100644
--- a/src/org/mediawiki/importer/XmlDumpReader.java
+++ b/src/org/mediawiki/importer/XmlDumpReader.java
@@ -351,7 +351,7 @@
        }
 
        void readId() {
-               int id = Integer.parseInt(bufferContents());
+               long id = Long.parseLong(bufferContents());
                if (contrib != null) 
                        contrib.Id = id;
                else if (rev != null)
@@ -404,7 +404,7 @@
        }
 
        void readParentid() {
-               rev.Parentid = Integer.parseInt(bufferContents());
+               rev.Parentid = Long.parseLong(bufferContents());
        }
 
        void readTimestamp() {
diff --git a/src/org/mediawiki/importer/XmlDumpWriter0_10.java 
b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
index afca381..69bd5e9 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter0_10.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
@@ -122,9 +122,9 @@
                XmlWriter writer = this.writer;
                writer.openElement("revision");
                // FIXME: We're writing "0" here if the input dump didn't 
include IDs.
-               writer.textElement("id", Integer.toString(rev.Id));
+               writer.textElement("id", Long.toString(rev.Id));
                if (rev.Parentid != 0) {
-                       writer.textElement("parentid", 
Integer.toString(rev.Parentid));
+                       writer.textElement("parentid", 
Long.toString(rev.Parentid));
                }
                
                writer.textElement("timestamp", formatTimestamp(rev.Timestamp));
diff --git a/src/org/mediawiki/importer/XmlDumpWriter0_3.java 
b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
index 40cdbb5..1d57df5 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter0_3.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
@@ -107,7 +107,7 @@
                XmlWriter writer = this.writer;
                writer.openElement("revision");
                if (rev.Id != 0)
-                       writer.textElement("id", Integer.toString(rev.Id));
+                       writer.textElement("id", Long.toString(rev.Id));
                
                writer.textElement("timestamp", formatTimestamp(rev.Timestamp));
                

-- 
To view, visit https://gerrit.wikimedia.org/r/192739
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibfa4564d3b5a00a61546600977f2074085012c49
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/mwdumper
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to