Wpmirrordev has uploaded a new change for review. https://gerrit.wikimedia.org/r/171976
Change subject: Extend maximum allowed mediawiki version to 1.24 ...................................................................... Extend maximum allowed mediawiki version to 1.24 Change-Id: Ie6d6d6b962f460e4cacaacf60f57992213ca723a Extend maximum allowed XML dump schema to 0.10 Change-Id: I8993f3a6058b447b023cb66e3246805175e28b4a --- M xmlfileutils/CHANGELOG M xmlfileutils/mwxml2sql.c M xmlfileutils/mwxml2sql.h M xmlfileutils/mwxmlelts.c M xmlfileutils/sqlutils.c 5 files changed, 61 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps refs/changes/76/171976/1 diff --git a/xmlfileutils/CHANGELOG b/xmlfileutils/CHANGELOG index 1ac5c8a..0e54ec1 100644 --- a/xmlfileutils/CHANGELOG +++ b/xmlfileutils/CHANGELOG @@ -1,4 +1,16 @@ -29 May 2013 Kent L. Miller <[email protected]> +06 Nov 2014 WP-MIRROR <[email protected]> + + Release 0.0.2.24-1 + + * mwxml2sql.h: + Schema: define DBNAME, update struct siteinfo_t + * mwxml2sql.c: + Schema: Extend mediawiki version to 1.24 + Schema: Extend XML data dump file schema to 0.10 + * mwxmlelts.c: + Schema: get <dbname> element + +29 May 2013 WP-MIRROR <[email protected]> Release 0.0.2-1 diff --git a/xmlfileutils/mwxml2sql.c b/xmlfileutils/mwxml2sql.c index 729dc3d..409e9d3 100644 --- a/xmlfileutils/mwxml2sql.c +++ b/xmlfileutils/mwxml2sql.c @@ -61,8 +61,8 @@ "with this program. If not, see <http://www.gnu.org/licenses/>\n\n" "Written by Ariel T. Glenn.\n"; fprintf(stderr,"mwxml2sql %s\n", version_string); - fprintf(stderr,"Supported input schema versions: 0.4 through 0.8.\n"); - fprintf(stderr,"Supported output MediaWiki versions: 1.5 through 1.21.\n\n"); + fprintf(stderr,"Supported input schema versions: 0.4 through 0.10.\n"); + fprintf(stderr,"Supported output MediaWiki versions: 1.5 through 1.24.\n\n"); fprintf(stderr,"%s",copyright); exit(-1); } @@ -120,9 +120,9 @@ comma = strchr(start, ','); if (comma) *comma = '\0'; else last++; - /* we know MW 1.5 through MW 1.21 even though there is no MW 1.21 yet */ + /* we know MW 1.5 through MW 1.24 */ sscanf(start, "%u.%u%20s", &mwv->major, &mwv->minor, mwv->qualifier); - if (mwv->major != 1 || mwv->minor < 5 || mwv->minor > 21) { + if (mwv->major != 1 || mwv->minor < 5 || mwv->minor > 24) { free_mw_version(mwv); return(NULL); } diff --git a/xmlfileutils/mwxml2sql.h b/xmlfileutils/mwxml2sql.h index 1223d6b..d7d2ee1 100644 --- a/xmlfileutils/mwxml2sql.h +++ b/xmlfileutils/mwxml2sql.h @@ -59,6 +59,7 @@ typedef struct { char sitename[FIELD_LEN]; + char dbname[FIELD_LEN]; char base[FIELD_LEN]; char generator[FIELD_LEN]; char s_case[FIELD_LEN]; @@ -151,6 +152,7 @@ #define CASE "case" #define COMMENT "comment" #define CONTRIBUTOR "contributor" +#define DBNAME "dbname" #define FORMAT "format" #define GENERATOR "generator" #define ID "id" diff --git a/xmlfileutils/mwxmlelts.c b/xmlfileutils/mwxmlelts.c index df8ac36..d1526d9 100644 --- a/xmlfileutils/mwxmlelts.c +++ b/xmlfileutils/mwxmlelts.c @@ -643,6 +643,22 @@ } } + /* schema 0.10 and later have model and format then text */ + r.model[0] = '\0'; + r.format[0] = '\0'; + if (get_elt_with_attrs(stubs, MODEL, r.model, sizeof(r.model), NULL, 0) != -1) { + if (get_line(stubs) == NULL) { + whine("abrupt end of revision data in rev id %s", r.id); + return(0); + } + } + if (get_elt_with_attrs(stubs, FORMAT, r.format, sizeof(r.format), NULL, 0) != -1) { + if (get_line(stubs) == NULL) { + whine("abrupt end of revision data in rev id %s", r.id); + return(0); + } + } + /* <text id="382338088" bytes="57" /> */ get_elt_with_attrs(stubs, TEXT, NULL, 0, attrs, MAX_ATTRS_STR_LEN); @@ -673,6 +689,8 @@ strcpy(r.text_len, "0"); break; } + else if (!strcmp(name, "xml:space")) + result = result; /* do nothing */ else { whine("unknown attribute in text tag"); break; @@ -686,9 +704,6 @@ return(0); } - r.model[0] = '\0'; - r.format[0] = '\0'; - /* schema 0.8 and later have sha1 here after text */ if (! r.sha1[0]) { if (get_elt_with_attrs(stubs, SHA1, r.sha1, sizeof(r.sha1), NULL, 0) != -1) { @@ -698,7 +713,7 @@ } } } - /* schema 0.8 and later have model and format */ + /* schema 0.8 has model and format after text */ if (get_elt_with_attrs(stubs, MODEL, r.model, sizeof(r.model), NULL, 0) != -1) { if (get_line(stubs) == NULL) { whine("abrupt end of revision data in rev id %s", r.id); @@ -1067,6 +1082,9 @@ snprintf(out_buf, sizeof(out_buf), ", page_content_model"); write_if_mwv(sqlp, 1,20,0,0,out_buf, verbose); + snprintf(out_buf, sizeof(out_buf), ", page_links_updated"); + write_if_mwv(sqlp, 1,22,0,0,out_buf, verbose); + strcpy(out_buf, ") VALUES\n"); put_line_all(sqlp, out_buf); @@ -1090,6 +1108,12 @@ copy_sql_field(out_buf, p.model[0]?p.model:NULL, 1, 1); write_if_mwv(sqlp, 1, 20, 0, 0, out_buf, verbose); + + strcpy(out_buf, ", "); + write_if_mwv(sqlp, 1, 22, 0, 0, out_buf, verbose); + + copy_sql_field(out_buf, p.model[0]?p.model:NULL, 1, 1); + write_if_mwv(sqlp, 1, 22, 0, 0, out_buf, verbose); if (page_rows_written == MAX_PAGE_BATCH) { strcpy(out_buf,");\nCOMMIT;\n"); @@ -1295,8 +1319,9 @@ /* <siteinfo> <sitename>Wikipedia</sitename> + <dbname>enwiki</dbname> --new in 0.9 <base>http://en.wikipedia.org/wiki/Main_Page</base> - <generator>MediaWiki 1.21wmf6</generator> + <generator>MediaWiki 1.23wmf10</generator> <case>first-letter</case> <namespaces> <namespace key="-2" case="first-letter">Media</namespace> @@ -1352,6 +1377,14 @@ whine("abrupt end to siteinfo"); return(0); } + + /* this first appears in schema 0.9 */ + if (get_elt_with_attrs(f, DBNAME, s->base, sizeof(s->dbname), NULL, 0) != -1) { + if (get_line(f) == NULL) { + whine("abrupt end to siteinfo"); + return(0); + } + } result = get_elt_with_attrs(f, BASE, s->base, sizeof(s->base), NULL, 0); if (get_line(f) == NULL) { diff --git a/xmlfileutils/sqlutils.c b/xmlfileutils/sqlutils.c index 284501d..0ee9315 100644 --- a/xmlfileutils/sqlutils.c +++ b/xmlfileutils/sqlutils.c @@ -555,6 +555,10 @@ snprintf(out_buf, sizeof(out_buf), "`page_content_model` varbinary(32) DEFAULT NULL,\n"); put_line(f, out_buf); } + if (MWV_GREATER(mwv, 1, 22)) { + snprintf(out_buf, sizeof(out_buf), "`page_links_updated` varbinary(14) DEFAULT NULL,\n"); + put_line(f, out_buf); + } snprintf(out_buf, sizeof(out_buf), "PRIMARY KEY (`page_id`),\n"); put_line(f, out_buf); snprintf(out_buf, sizeof(out_buf), "UNIQUE KEY `name_title` (`page_namespace`,`page_title`),\n"); -- To view, visit https://gerrit.wikimedia.org/r/171976 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8993f3a6058b447b023cb66e3246805175e28b4a Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps Gerrit-Branch: ariel Gerrit-Owner: Wpmirrordev <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
