Wpmirrordev has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/171976

Change subject: Extend maximum allowed mediawiki version to 1.24
......................................................................

Extend maximum allowed mediawiki version to 1.24

Change-Id: Ie6d6d6b962f460e4cacaacf60f57992213ca723a

Extend maximum allowed XML dump schema to 0.10

Change-Id: I8993f3a6058b447b023cb66e3246805175e28b4a
---
M xmlfileutils/CHANGELOG
M xmlfileutils/mwxml2sql.c
M xmlfileutils/mwxml2sql.h
M xmlfileutils/mwxmlelts.c
M xmlfileutils/sqlutils.c
5 files changed, 61 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/dumps 
refs/changes/76/171976/1

diff --git a/xmlfileutils/CHANGELOG b/xmlfileutils/CHANGELOG
index 1ac5c8a..0e54ec1 100644
--- a/xmlfileutils/CHANGELOG
+++ b/xmlfileutils/CHANGELOG
@@ -1,4 +1,16 @@
-29 May 2013 Kent L. Miller <[email protected]>
+06 Nov 2014 WP-MIRROR <[email protected]>
+
+   Release 0.0.2.24-1
+
+    * mwxml2sql.h:
+      Schema: define DBNAME, update struct siteinfo_t
+    * mwxml2sql.c:
+      Schema: Extend mediawiki version to 1.24
+      Schema: Extend XML data dump file schema to 0.10
+    * mwxmlelts.c:
+      Schema: get <dbname> element
+
+29 May 2013 WP-MIRROR <[email protected]>
 
    Release 0.0.2-1
 
diff --git a/xmlfileutils/mwxml2sql.c b/xmlfileutils/mwxml2sql.c
index 729dc3d..409e9d3 100644
--- a/xmlfileutils/mwxml2sql.c
+++ b/xmlfileutils/mwxml2sql.c
@@ -61,8 +61,8 @@
 "with this program.  If not, see <http://www.gnu.org/licenses/>\n\n"
 "Written by Ariel T. Glenn.\n";
   fprintf(stderr,"mwxml2sql %s\n", version_string);
-  fprintf(stderr,"Supported input schema versions: 0.4 through 0.8.\n");
-  fprintf(stderr,"Supported output MediaWiki versions: 1.5 through 1.21.\n\n");
+  fprintf(stderr,"Supported input schema versions: 0.4 through 0.10.\n");
+  fprintf(stderr,"Supported output MediaWiki versions: 1.5 through 1.24.\n\n");
   fprintf(stderr,"%s",copyright);
   exit(-1);
 }
@@ -120,9 +120,9 @@
     comma = strchr(start, ',');
     if (comma) *comma = '\0';
     else last++;
-    /* we know MW 1.5 through MW 1.21 even though there is no MW 1.21 yet */
+    /* we know MW 1.5 through MW 1.24 */
     sscanf(start, "%u.%u%20s", &mwv->major, &mwv->minor, mwv->qualifier);
-    if (mwv->major != 1 || mwv->minor < 5 || mwv->minor > 21) {
+    if (mwv->major != 1 || mwv->minor < 5 || mwv->minor > 24) {
       free_mw_version(mwv);
       return(NULL);
     }
diff --git a/xmlfileutils/mwxml2sql.h b/xmlfileutils/mwxml2sql.h
index 1223d6b..d7d2ee1 100644
--- a/xmlfileutils/mwxml2sql.h
+++ b/xmlfileutils/mwxml2sql.h
@@ -59,6 +59,7 @@
 
 typedef struct {
   char sitename[FIELD_LEN];
+  char dbname[FIELD_LEN];
   char base[FIELD_LEN];
   char generator[FIELD_LEN];
   char s_case[FIELD_LEN];
@@ -151,6 +152,7 @@
 #define CASE "case"
 #define COMMENT "comment"
 #define CONTRIBUTOR "contributor"
+#define DBNAME "dbname"
 #define FORMAT "format"
 #define GENERATOR "generator"
 #define ID "id"
diff --git a/xmlfileutils/mwxmlelts.c b/xmlfileutils/mwxmlelts.c
index df8ac36..d1526d9 100644
--- a/xmlfileutils/mwxmlelts.c
+++ b/xmlfileutils/mwxmlelts.c
@@ -643,6 +643,22 @@
     }
   }
 
+  /* schema 0.10 and later have model and format then text */
+  r.model[0] = '\0';
+  r.format[0] = '\0';
+  if (get_elt_with_attrs(stubs, MODEL, r.model, sizeof(r.model), NULL, 0) != 
-1) {
+    if (get_line(stubs) == NULL) {
+      whine("abrupt end of revision data in rev id %s", r.id);
+      return(0);
+    }
+  }
+  if (get_elt_with_attrs(stubs, FORMAT, r.format, sizeof(r.format), NULL, 0) 
!= -1) {
+    if (get_line(stubs) == NULL) {
+      whine("abrupt end of revision data in rev id %s", r.id);
+      return(0);
+    }
+  }
+
   /*       <text id="382338088" bytes="57" />  */
   get_elt_with_attrs(stubs, TEXT, NULL, 0, attrs, MAX_ATTRS_STR_LEN);
 
@@ -673,6 +689,8 @@
       strcpy(r.text_len, "0");
       break;
     }
+    else if (!strcmp(name, "xml:space"))
+      result = result; /* do nothing */
     else {
       whine("unknown attribute in text tag");
       break;
@@ -686,9 +704,6 @@
     return(0);
   }
 
-  r.model[0] = '\0';
-  r.format[0] = '\0';
-
   /* schema 0.8 and later have sha1 here after text */
   if (! r.sha1[0]) {
     if (get_elt_with_attrs(stubs, SHA1, r.sha1, sizeof(r.sha1), NULL, 0) != 
-1) {
@@ -698,7 +713,7 @@
       }
     }
   }
-  /* schema 0.8 and later have model and format */
+  /* schema 0.8 has model and format after text */
   if (get_elt_with_attrs(stubs, MODEL, r.model, sizeof(r.model), NULL, 0) != 
-1) {
     if (get_line(stubs) == NULL) {
       whine("abrupt end of revision data in rev id %s", r.id);
@@ -1067,6 +1082,9 @@
     snprintf(out_buf, sizeof(out_buf), ", page_content_model");
     write_if_mwv(sqlp, 1,20,0,0,out_buf, verbose);
 
+    snprintf(out_buf, sizeof(out_buf), ", page_links_updated");
+    write_if_mwv(sqlp, 1,22,0,0,out_buf, verbose);
+
     strcpy(out_buf, ") VALUES\n");
     put_line_all(sqlp, out_buf);
 
@@ -1090,6 +1108,12 @@
 
   copy_sql_field(out_buf, p.model[0]?p.model:NULL, 1, 1);
   write_if_mwv(sqlp, 1, 20, 0, 0, out_buf, verbose);
+
+  strcpy(out_buf, ", ");
+  write_if_mwv(sqlp, 1, 22, 0, 0, out_buf, verbose);
+
+  copy_sql_field(out_buf, p.model[0]?p.model:NULL, 1, 1);
+  write_if_mwv(sqlp, 1, 22, 0, 0, out_buf, verbose);
 
   if (page_rows_written == MAX_PAGE_BATCH) {
     strcpy(out_buf,");\nCOMMIT;\n");
@@ -1295,8 +1319,9 @@
 /*
   <siteinfo>
     <sitename>Wikipedia</sitename>
+    <dbname>enwiki</dbname>                            --new in 0.9
     <base>http://en.wikipedia.org/wiki/Main_Page</base>
-    <generator>MediaWiki 1.21wmf6</generator>
+    <generator>MediaWiki 1.23wmf10</generator>
     <case>first-letter</case>
     <namespaces>
       <namespace key="-2" case="first-letter">Media</namespace>
@@ -1352,6 +1377,14 @@
     whine("abrupt end to siteinfo");
     return(0);
   }
+
+  /* this first appears in schema 0.9 */
+  if (get_elt_with_attrs(f, DBNAME, s->base, sizeof(s->dbname), NULL, 0) != 
-1) {
+    if (get_line(f) == NULL) {
+      whine("abrupt end to siteinfo");
+      return(0);
+    }
+  }
   result = get_elt_with_attrs(f, BASE, s->base, sizeof(s->base), NULL, 0);
 
   if (get_line(f) == NULL) {
diff --git a/xmlfileutils/sqlutils.c b/xmlfileutils/sqlutils.c
index 284501d..0ee9315 100644
--- a/xmlfileutils/sqlutils.c
+++ b/xmlfileutils/sqlutils.c
@@ -555,6 +555,10 @@
       snprintf(out_buf, sizeof(out_buf), "`page_content_model` varbinary(32) 
DEFAULT NULL,\n");
       put_line(f, out_buf);
     }
+    if (MWV_GREATER(mwv, 1, 22)) {
+      snprintf(out_buf, sizeof(out_buf), "`page_links_updated` varbinary(14) 
DEFAULT NULL,\n");
+      put_line(f, out_buf);
+    }
     snprintf(out_buf, sizeof(out_buf), "PRIMARY KEY (`page_id`),\n");
     put_line(f, out_buf);
     snprintf(out_buf, sizeof(out_buf), "UNIQUE KEY `name_title` 
(`page_namespace`,`page_title`),\n");

-- 
To view, visit https://gerrit.wikimedia.org/r/171976
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8993f3a6058b447b023cb66e3246805175e28b4a
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps
Gerrit-Branch: ariel
Gerrit-Owner: Wpmirrordev <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to