Awight has uploaded a new change for review.
https://gerrit.wikimedia.org/r/192739
Change subject: Use 64-bit integers to store revision ID
......................................................................
Use 64-bit integers to store revision ID
Enwiki is already at max(rev_id) = 1.2 x 2^29. Use Long everywhere
when handling revision IDs.
Change-Id: Ibfa4564d3b5a00a61546600977f2074085012c49
---
M src/org/mediawiki/dumper/ProgressFilter.java
M src/org/mediawiki/importer/Revision.java
M src/org/mediawiki/importer/RevisionListFilter.java
M src/org/mediawiki/importer/SqlWriter14.java
M src/org/mediawiki/importer/SqlWriter15.java
M src/org/mediawiki/importer/SqlWriter1_25.java
M src/org/mediawiki/importer/XmlDumpReader.java
M src/org/mediawiki/importer/XmlDumpWriter0_10.java
M src/org/mediawiki/importer/XmlDumpWriter0_3.java
9 files changed, 28 insertions(+), 29 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/tools/mwdumper
refs/changes/39/192739/1
diff --git a/src/org/mediawiki/dumper/ProgressFilter.java
b/src/org/mediawiki/dumper/ProgressFilter.java
index 01ce837..9d890f4 100644
--- a/src/org/mediawiki/dumper/ProgressFilter.java
+++ b/src/org/mediawiki/dumper/ProgressFilter.java
@@ -35,7 +35,7 @@
public class ProgressFilter extends PageFilter {
int pages = 0;
- int revisions = 0;
+ long revisions = 0;
int interval = 1000;
MessageFormat format = new MessageFormat("{0} pages ({1}/sec), {2} revs
({3}/sec)");
long start = System.currentTimeMillis();
@@ -79,7 +79,7 @@
sendOutput(format.format(new Object[] {
new Integer(pages),
rate(delta, pages),
- new Integer(revisions),
+ new Long(revisions),
rate(delta, revisions)}));
}
@@ -87,7 +87,7 @@
System.err.println(text);
}
- private static Object rate(long delta, int count) {
+ private static Object rate(long delta, long count) {
return (delta > 0.001)
? (Object)new Double(1000.0 * (double)count /
(double)delta)
: (Object)"-";
diff --git a/src/org/mediawiki/importer/Revision.java
b/src/org/mediawiki/importer/Revision.java
index 9d6f695..9995194 100644
--- a/src/org/mediawiki/importer/Revision.java
+++ b/src/org/mediawiki/importer/Revision.java
@@ -28,8 +28,8 @@
import java.util.Calendar;
public class Revision {
- public int Id;
- public int Parentid;
+ public long Id;
+ public long Parentid;
public Calendar Timestamp;
public Contributor Contributor;
public String Comment;
diff --git a/src/org/mediawiki/importer/RevisionListFilter.java
b/src/org/mediawiki/importer/RevisionListFilter.java
index 44f4f13..96aab6d 100644
--- a/src/org/mediawiki/importer/RevisionListFilter.java
+++ b/src/org/mediawiki/importer/RevisionListFilter.java
@@ -25,7 +25,6 @@
package org.mediawiki.importer;
-import java.lang.Integer;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
@@ -34,20 +33,20 @@
public class RevisionListFilter implements DumpWriter {
DumpWriter sink;
- protected TreeSet<Integer> revIds;
+ protected TreeSet<Long> revIds;
protected Page currentPage;
protected boolean pageWritten;
public RevisionListFilter(DumpWriter sink, String sourceFileName)
throws IOException {
this.sink = sink;
- revIds = new TreeSet<Integer>();
+ revIds = new TreeSet<Long>();
BufferedReader input = new BufferedReader(new InputStreamReader(
new FileInputStream(sourceFileName), "utf-8"));
String line = input.readLine();
while (line != null) {
line = line.trim();
if (line.length() > 0 && !line.startsWith("#")) {
- revIds.add(new Integer(line));
+ revIds.add(new Long(line));
}
line = input.readLine();
}
@@ -82,7 +81,7 @@
}
public void writeRevision(Revision revision) throws IOException {
- if (revIds.contains(new Integer(revision.Id))) {
+ if (revIds.contains(new Long(revision.Id))) {
if (!pageWritten) {
sink.writeStartPage(currentPage);
pageWritten = true;
diff --git a/src/org/mediawiki/importer/SqlWriter14.java
b/src/org/mediawiki/importer/SqlWriter14.java
index 2585c4d..494ce5e 100644
--- a/src/org/mediawiki/importer/SqlWriter14.java
+++ b/src/org/mediawiki/importer/SqlWriter14.java
@@ -60,7 +60,7 @@
private void writeOldRevision(Page page, Revision revision) throws
IOException {
bufferInsertRow("old", new Object[][] {
- {"old_id", new Integer(revision.Id)},
+ {"old_id", new Long(revision.Id)},
{"old_namespace", page.Title.Namespace},
{"old_title", titleFormat(page.Title.Text)},
{"old_text", revision.Text == null ? "" :
revision.Text},
diff --git a/src/org/mediawiki/importer/SqlWriter15.java
b/src/org/mediawiki/importer/SqlWriter15.java
index f57473d..50873e3 100644
--- a/src/org/mediawiki/importer/SqlWriter15.java
+++ b/src/org/mediawiki/importer/SqlWriter15.java
@@ -66,7 +66,7 @@
public void writeRevision(Revision revision) throws IOException {
bufferInsertRow(traits.getTextTable(), new Object[][] {
- {"old_id", new Integer(revision.Id)},
+ {"old_id", new Long(revision.Id)},
{"old_text", revision.Text == null ? "" :
revision.Text},
{"old_flags", "utf-8"}});
@@ -76,9 +76,9 @@
if (revision.Text==null) rev_deleted |= DELETED_TEXT;
bufferInsertRow("revision", new Object[][] {
- {"rev_id", new Integer(revision.Id)},
+ {"rev_id", new Long(revision.Id)},
{"rev_page", new Integer(currentPage.Id)},
- {"rev_text_id", new Integer(revision.Id)},
+ {"rev_text_id", new Long(revision.Id)},
{"rev_comment",
commentFormat(revision.Comment)},
{"rev_user", revision.Contributor.Username ==
null ? ZERO : new Integer(revision.Contributor.Id)},
{"rev_user_text", revision.Contributor.Username
== null ? "" : revision.Contributor.Username},
@@ -100,7 +100,7 @@
{"page_is_new", ZERO},
{"page_random", traits.getRandom()},
{"page_touched", traits.getCurrentTime()},
- {"page_latest", new Integer(revision.Id)},
+ {"page_latest", new Long(revision.Id)},
{"page_len", revision.Bytes}});
checkpoint();
}
diff --git a/src/org/mediawiki/importer/SqlWriter1_25.java
b/src/org/mediawiki/importer/SqlWriter1_25.java
index a24696c..54d09e8 100644
--- a/src/org/mediawiki/importer/SqlWriter1_25.java
+++ b/src/org/mediawiki/importer/SqlWriter1_25.java
@@ -32,11 +32,11 @@
public SqlWriter1_25(SqlWriter.Traits tr, SqlStream output) {
super(tr, output);
}
-
+
public SqlWriter1_25(SqlWriter.Traits tr, SqlStream output, String
prefix) {
super(tr, output, prefix);
}
-
+
protected void updatePage(Page page, Revision revision) throws
IOException {
bufferInsertRow("page", new Object[][] {
{"page_id", new Integer(page.Id)},
@@ -47,7 +47,7 @@
{"page_is_new", ZERO},
{"page_random", traits.getRandom()},
{"page_touched", traits.getCurrentTime()},
- {"page_latest", new Integer(revision.Id)},
+ {"page_latest", new Long(revision.Id)},
{"page_len", revision.Bytes},
{"page_content_model", revision.Model},
});
@@ -64,32 +64,32 @@
public void writeRevision(Revision revision) throws IOException {
bufferInsertRow(traits.getTextTable(), new Object[][] {
- {"old_id", new Integer(revision.Id)},
+ {"old_id", new Long(revision.Id)},
{"old_text", revision.Text == null ? "" :
revision.Text},
{"old_flags", "utf-8"}});
-
+
int rev_deleted = 0;
if (revision.Contributor.Username==null) rev_deleted |=
DELETED_USER;
if (revision.Comment==null) rev_deleted |= DELETED_COMMENT;
if (revision.Text==null) rev_deleted |= DELETED_TEXT;
bufferInsertRow("revision", new Object[][] {
- {"rev_id", new Integer(revision.Id)},
+ {"rev_id", new Long(revision.Id)},
{"rev_page", new Integer(currentPage.Id)},
- {"rev_text_id", new Integer(revision.Id)},
+ {"rev_text_id", new Long(revision.Id)},
{"rev_comment",
commentFormat(revision.Comment)},
{"rev_user", revision.Contributor.Username ==
null ? ZERO : new Integer(revision.Contributor.Id)},
{"rev_user_text", revision.Contributor.Username
== null ? "" : revision.Contributor.Username},
{"rev_timestamp",
timestampFormat(revision.Timestamp)},
{"rev_minor_edit", revision.Minor ? ONE : ZERO},
- {"rev_parent_id", revision.Parentid == 0 ? null
: new Integer(revision.Parentid)},
+ {"rev_parent_id", revision.Parentid == 0 ? null
: new Long(revision.Parentid)},
{"rev_sha1", revision.Sha1},
{"rev_content_model", revision.Model},
{"rev_content_format", revision.Format},
{"rev_deleted", rev_deleted==0 ? ZERO : new
Integer(rev_deleted)},
{"rev_len", revision.Bytes},
});
-
+
lastRevision = revision;
}
}
diff --git a/src/org/mediawiki/importer/XmlDumpReader.java
b/src/org/mediawiki/importer/XmlDumpReader.java
index d5df43e..506078a 100644
--- a/src/org/mediawiki/importer/XmlDumpReader.java
+++ b/src/org/mediawiki/importer/XmlDumpReader.java
@@ -351,7 +351,7 @@
}
void readId() {
- int id = Integer.parseInt(bufferContents());
+ long id = Long.parseLong(bufferContents());
if (contrib != null)
contrib.Id = id;
else if (rev != null)
@@ -404,7 +404,7 @@
}
void readParentid() {
- rev.Parentid = Integer.parseInt(bufferContents());
+ rev.Parentid = Long.parseLong(bufferContents());
}
void readTimestamp() {
diff --git a/src/org/mediawiki/importer/XmlDumpWriter0_10.java
b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
index afca381..69bd5e9 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter0_10.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_10.java
@@ -122,9 +122,9 @@
XmlWriter writer = this.writer;
writer.openElement("revision");
// FIXME: We're writing "0" here if the input dump didn't
include IDs.
- writer.textElement("id", Integer.toString(rev.Id));
+ writer.textElement("id", Long.toString(rev.Id));
if (rev.Parentid != 0) {
- writer.textElement("parentid",
Integer.toString(rev.Parentid));
+ writer.textElement("parentid",
Long.toString(rev.Parentid));
}
writer.textElement("timestamp", formatTimestamp(rev.Timestamp));
diff --git a/src/org/mediawiki/importer/XmlDumpWriter0_3.java
b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
index 40cdbb5..1d57df5 100644
--- a/src/org/mediawiki/importer/XmlDumpWriter0_3.java
+++ b/src/org/mediawiki/importer/XmlDumpWriter0_3.java
@@ -107,7 +107,7 @@
XmlWriter writer = this.writer;
writer.openElement("revision");
if (rev.Id != 0)
- writer.textElement("id", Integer.toString(rev.Id));
+ writer.textElement("id", Long.toString(rev.Id));
writer.textElement("timestamp", formatTimestamp(rev.Timestamp));
--
To view, visit https://gerrit.wikimedia.org/r/192739
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibfa4564d3b5a00a61546600977f2074085012c49
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/tools/mwdumper
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits